[Devel,RH7,2/2] ve/net: restrict number of net devices for CT

Submitted by Pavel Tikhomirov on March 20, 2017, 3:09 p.m.

Details

Message ID 20170320150934.23046-2-ptikhomirov@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Pavel Tikhomirov March 20, 2017, 3:09 p.m.
Description from Solar:
"Adding thousands of network interfaces (aliases) in a container causes
moderately slow container stop times (1 minute observed) with many
kernel messages like "stop ploop26375 failed (holders=1)" appearing.
A safety limit on the number of network interfaces in a container
should be introduced by default, especially considering that there
might be (or might be introduced later) ways to add network interfaces
faster (thereby making the attack more powerful and quicker to carry
out)."

In default_device_exit_batch we call dev->rtnl_link_ops->dellink for
each link on cleanup_net. So more links in CT means it's slower stop.
On number of network(veth) interfaces ~10000, CT stop time is ~1min.

Make netif_avail_nr/netif_max_nr pair on ve cgroup to restrict number
of interfaces in CT anologious to netns limit. With default 256 network
devices (most of them are veth pairs) containter stop time will be ~3sec
(~1sec for removing network interfaces).

https://jira.sw.ru/browse/PSBM-51354
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 include/linux/ve.h |  3 +++
 kernel/ve/ve.c     | 28 ++++++++++++++++++++++++++++
 net/core/dev.c     | 17 +++++++++++++++--
 3 files changed, 46 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/linux/ve.h b/include/linux/ve.h
index edff7e4..708c6d3 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -120,6 +120,8 @@  struct ve_struct {
 #endif
 	atomic_t		netns_avail_nr;
 	int			netns_max_nr;
+	atomic_t		netif_avail_nr;
+	int			netif_max_nr;
 	/* Number of mounts. May become unbalanced if VE0 mounts something
 	 * and the VE unmounts it. This is acceptable.
 	 */
@@ -138,6 +140,7 @@  struct ve_devmnt {
 };
 
 #define NETNS_MAX_NR_DEFAULT	256	/* number of net-namespaces per-VE */
+#define NETIF_MAX_NR_DEFAULT	256	/* number of net-interfaces per-VE */
 
 #define VE_MEMINFO_DEFAULT      1       /* default behaviour */
 #define VE_MEMINFO_SYSTEM       0       /* disable meminfo virtualization */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 67b7882..a17b048 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -81,6 +81,8 @@  struct ve_struct ve0 = {
 	.mnt_nr			= 0,
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.netns_max_nr		= INT_MAX,
+	.netif_avail_nr		= ATOMIC_INIT(INT_MAX),
+	.netif_max_nr		= INT_MAX,
 };
 EXPORT_SYMBOL(ve0);
 
@@ -653,6 +655,9 @@  static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
 	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
 	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
 
+	atomic_set(&ve->netif_avail_nr, NETIF_MAX_NR_DEFAULT);
+	ve->netif_max_nr = NETIF_MAX_NR_DEFAULT;
+
 do_init:
 	init_rwsem(&ve->op_sem);
 	INIT_LIST_HEAD(&ve->devices);
@@ -1191,6 +1196,8 @@  enum {
 	VE_CF_PID_MAX,
 	VE_CF_NETNS_MAX_NR,
 	VE_CF_NETNS_NR,
+	VE_CF_NETIF_MAX_NR,
+	VE_CF_NETIF_NR,
 };
 
 static int ve_ts_read(struct cgroup *cg, struct cftype *cft, struct seq_file *m)
@@ -1260,6 +1267,10 @@  static u64 ve_read_u64(struct cgroup *cg, struct cftype *cft)
 		return cgroup_ve(cg)->netns_max_nr;
 	else if (cft->private == VE_CF_NETNS_NR)
 		return atomic_read(&cgroup_ve(cg)->netns_avail_nr);
+	else if (cft->private == VE_CF_NETIF_MAX_NR)
+		return cgroup_ve(cg)->netif_max_nr;
+	else if (cft->private == VE_CF_NETIF_NR)
+		return atomic_read(&cgroup_ve(cg)->netif_avail_nr);
 	return 0;
 }
 
@@ -1346,6 +1357,11 @@  static int _ve_write_u64(struct cgroup *cg, struct cftype *cft,
 
 		ve->netns_max_nr = value;
 		atomic_add(delta, &ve->netns_avail_nr);
+	} else if (cft->private == VE_CF_NETIF_MAX_NR) {
+		int delta = value - ve->netif_max_nr;
+
+		ve->netif_max_nr = value;
+		atomic_add(delta, &ve->netif_avail_nr);
 	}
 	up_write(&ve->op_sem);
 	return 0;
@@ -1451,6 +1467,18 @@  static struct cftype ve_cftypes[] = {
 		.read_u64		= ve_read_u64,
 		.private		= VE_CF_NETNS_NR,
 	},
+	{
+		.name			= "netif_max_nr",
+		.flags			= CFTYPE_NOT_ON_ROOT,
+		.read_u64		= ve_read_u64,
+		.write_u64		= ve_write_u64,
+		.private		= VE_CF_NETIF_MAX_NR,
+	},
+	{
+		.name			= "netif_avail_nr",
+		.read_u64		= ve_read_u64,
+		.private		= VE_CF_NETIF_NR,
+	},
 	{ }
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 40e5a97..12de79c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6605,6 +6605,10 @@  int register_netdevice(struct net_device *dev)
 	if (!ve_is_super(net->owner_ve) && ve_is_dev_movable(dev))
 		goto out;
 
+	ret = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
+		goto out;
+
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
 
@@ -6612,7 +6616,7 @@  int register_netdevice(struct net_device *dev)
 
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
-		goto out;
+		goto err_avail;
 
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
@@ -6620,7 +6624,7 @@  int register_netdevice(struct net_device *dev)
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
-			goto out;
+			goto err_avail;
 		}
 	}
 
@@ -6734,6 +6738,8 @@  int register_netdevice(struct net_device *dev)
 err_uninit:
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
+err_avail:
+	atomic_inc(&net->owner_ve->netif_avail_nr);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -7011,6 +7017,8 @@  void netdev_run_todo(void)
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 
+		atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 		/* It must be the very last action,
 		 * after this 'dev' may point to freed up memory.
 		 */
@@ -7415,6 +7423,11 @@  int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 			goto out;
 	}
 
+	err = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
+		goto out;
+	atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */

Comments

Andrey Vagin March 28, 2017, 10:06 p.m.
Acked-by: Andrew Vagin <avagin@virtuozzo.com>

On Mon, Mar 20, 2017 at 06:09:34PM +0300, Pavel Tikhomirov wrote:
> Description from Solar:
> "Adding thousands of network interfaces (aliases) in a container causes
> moderately slow container stop times (1 minute observed) with many
> kernel messages like "stop ploop26375 failed (holders=1)" appearing.
> A safety limit on the number of network interfaces in a container
> should be introduced by default, especially considering that there
> might be (or might be introduced later) ways to add network interfaces
> faster (thereby making the attack more powerful and quicker to carry
> out)."
> 
> In default_device_exit_batch we call dev->rtnl_link_ops->dellink for
> each link on cleanup_net. So more links in CT means it's slower stop.
> On number of network(veth) interfaces ~10000, CT stop time is ~1min.
> 
> Make netif_avail_nr/netif_max_nr pair on ve cgroup to restrict number
> of interfaces in CT anologious to netns limit. With default 256 network
> devices (most of them are veth pairs) containter stop time will be ~3sec
> (~1sec for removing network interfaces).
> 
> https://jira.sw.ru/browse/PSBM-51354
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> ---
>  include/linux/ve.h |  3 +++
>  kernel/ve/ve.c     | 28 ++++++++++++++++++++++++++++
>  net/core/dev.c     | 17 +++++++++++++++--
>  3 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index edff7e4..708c6d3 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -120,6 +120,8 @@ struct ve_struct {
>  #endif
>  	atomic_t		netns_avail_nr;
>  	int			netns_max_nr;
> +	atomic_t		netif_avail_nr;
> +	int			netif_max_nr;
>  	/* Number of mounts. May become unbalanced if VE0 mounts something
>  	 * and the VE unmounts it. This is acceptable.
>  	 */
> @@ -138,6 +140,7 @@ struct ve_devmnt {
>  };
>  
>  #define NETNS_MAX_NR_DEFAULT	256	/* number of net-namespaces per-VE */
> +#define NETIF_MAX_NR_DEFAULT	256	/* number of net-interfaces per-VE */
>  
>  #define VE_MEMINFO_DEFAULT      1       /* default behaviour */
>  #define VE_MEMINFO_SYSTEM       0       /* disable meminfo virtualization */
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 67b7882..a17b048 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -81,6 +81,8 @@ struct ve_struct ve0 = {
>  	.mnt_nr			= 0,
>  	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
>  	.netns_max_nr		= INT_MAX,
> +	.netif_avail_nr		= ATOMIC_INIT(INT_MAX),
> +	.netif_max_nr		= INT_MAX,
>  };
>  EXPORT_SYMBOL(ve0);
>  
> @@ -653,6 +655,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
>  	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
>  	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
>  
> +	atomic_set(&ve->netif_avail_nr, NETIF_MAX_NR_DEFAULT);
> +	ve->netif_max_nr = NETIF_MAX_NR_DEFAULT;
> +
>  do_init:
>  	init_rwsem(&ve->op_sem);
>  	INIT_LIST_HEAD(&ve->devices);
> @@ -1191,6 +1196,8 @@ enum {
>  	VE_CF_PID_MAX,
>  	VE_CF_NETNS_MAX_NR,
>  	VE_CF_NETNS_NR,
> +	VE_CF_NETIF_MAX_NR,
> +	VE_CF_NETIF_NR,
>  };
>  
>  static int ve_ts_read(struct cgroup *cg, struct cftype *cft, struct seq_file *m)
> @@ -1260,6 +1267,10 @@ static u64 ve_read_u64(struct cgroup *cg, struct cftype *cft)
>  		return cgroup_ve(cg)->netns_max_nr;
>  	else if (cft->private == VE_CF_NETNS_NR)
>  		return atomic_read(&cgroup_ve(cg)->netns_avail_nr);
> +	else if (cft->private == VE_CF_NETIF_MAX_NR)
> +		return cgroup_ve(cg)->netif_max_nr;
> +	else if (cft->private == VE_CF_NETIF_NR)
> +		return atomic_read(&cgroup_ve(cg)->netif_avail_nr);
>  	return 0;
>  }
>  
> @@ -1346,6 +1357,11 @@ static int _ve_write_u64(struct cgroup *cg, struct cftype *cft,
>  
>  		ve->netns_max_nr = value;
>  		atomic_add(delta, &ve->netns_avail_nr);
> +	} else if (cft->private == VE_CF_NETIF_MAX_NR) {
> +		int delta = value - ve->netif_max_nr;
> +
> +		ve->netif_max_nr = value;
> +		atomic_add(delta, &ve->netif_avail_nr);
>  	}
>  	up_write(&ve->op_sem);
>  	return 0;
> @@ -1451,6 +1467,18 @@ static struct cftype ve_cftypes[] = {
>  		.read_u64		= ve_read_u64,
>  		.private		= VE_CF_NETNS_NR,
>  	},
> +	{
> +		.name			= "netif_max_nr",
> +		.flags			= CFTYPE_NOT_ON_ROOT,
> +		.read_u64		= ve_read_u64,
> +		.write_u64		= ve_write_u64,
> +		.private		= VE_CF_NETIF_MAX_NR,
> +	},
> +	{
> +		.name			= "netif_avail_nr",
> +		.read_u64		= ve_read_u64,
> +		.private		= VE_CF_NETIF_NR,
> +	},
>  	{ }
>  };
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 40e5a97..12de79c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6605,6 +6605,10 @@ int register_netdevice(struct net_device *dev)
>  	if (!ve_is_super(net->owner_ve) && ve_is_dev_movable(dev))
>  		goto out;
>  
> +	ret = -ENOMEM;
> +	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
> +		goto out;
> +
>  	spin_lock_init(&dev->addr_list_lock);
>  	netdev_set_addr_lockdep_class(dev);
>  
> @@ -6612,7 +6616,7 @@ int register_netdevice(struct net_device *dev)
>  
>  	ret = dev_get_valid_name(net, dev, dev->name);
>  	if (ret < 0)
> -		goto out;
> +		goto err_avail;
>  
>  	/* Init, if this function is available */
>  	if (dev->netdev_ops->ndo_init) {
> @@ -6620,7 +6624,7 @@ int register_netdevice(struct net_device *dev)
>  		if (ret) {
>  			if (ret > 0)
>  				ret = -EIO;
> -			goto out;
> +			goto err_avail;
>  		}
>  	}
>  
> @@ -6734,6 +6738,8 @@ int register_netdevice(struct net_device *dev)
>  err_uninit:
>  	if (dev->netdev_ops->ndo_uninit)
>  		dev->netdev_ops->ndo_uninit(dev);
> +err_avail:
> +	atomic_inc(&net->owner_ve->netif_avail_nr);
>  	goto out;
>  }
>  EXPORT_SYMBOL(register_netdevice);
> @@ -7011,6 +7017,8 @@ void netdev_run_todo(void)
>  		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
>  		WARN_ON(dev->dn_ptr);
>  
> +		atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
> +
>  		/* It must be the very last action,
>  		 * after this 'dev' may point to freed up memory.
>  		 */
> @@ -7415,6 +7423,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
>  			goto out;
>  	}
>  
> +	err = -ENOMEM;
> +	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
> +		goto out;
> +	atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
> +
>  	/*
>  	 * And now a mini version of register_netdevice unregister_netdevice.
>  	 */
> -- 
> 2.9.3
>