[rh7,1/8] ve/net/bridge: make net.bridge.* sysctl visible in Containers (r/o)

Submitted by Konstantin Khorenko on Feb. 21, 2020, 4:07 p.m.

Details

Message ID 20200221160731.16888-2-khorenko@virtuozzo.com
State New
Series "enable running Kubernetes inside a Container"
Headers show

Commit Message

Konstantin Khorenko Feb. 21, 2020, 4:07 p.m.
Kubernetes does some prechecks before run, in particular it requires
"net.bridge.bridge-nf-call-ip[6]tables" sysctls to be enabled.

Thus let's make all "net.bridge.*" sysctls visible in Containers
but (as they are not virtualized) in readonly mode.

The implementation is not minimal to gain the goal, but it was chosen
due to 2 reasons:

1) it's now similar to netfilter sysctls implementation (although
   netfilter sysctls are fully virtualized)

2) if we ever have to fully virtualize bridge netfilter sysctls,
   we won't have to rewrite the code completely again.

https://jira.sw.ru/browse/PSBM-92107

Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
---
 include/net/netns/conntrack.h   |   1 +
 net/bridge/br_netfilter_hooks.c | 107 ++++++++++++++++++++++++++++++++++------
 2 files changed, 93 insertions(+), 15 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index f51251d222d63..02c344a7b31d0 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -119,6 +119,7 @@  struct netns_ct {
 	struct ctl_table_header	*tstamp_sysctl_header;
 	struct ctl_table_header	*event_sysctl_header;
 	struct ctl_table_header	*helper_sysctl_header;
+	struct ctl_table_header	*brnf_sysctl_header;
 #endif
 	char			*slabname;
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index da60ef04fa0c5..d34e20f282965 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -45,7 +45,6 @@ 
 #endif
 
 #ifdef CONFIG_SYSCTL
-static struct ctl_table_header *brnf_sysctl_header;
 static int brnf_call_iptables __read_mostly = 1;
 static int brnf_call_ip6tables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
@@ -1004,35 +1003,113 @@  static struct ctl_table brnf_table[] = {
 };
 #endif
 
+#ifdef CONFIG_SYSCTL
+static int br_netfilter_init_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+	int num_entries, i;
+
+	table = kmemdup(brnf_table, sizeof(brnf_table), GFP_KERNEL);
+	if (!table)
+		goto out_kmemdup;
+
+	/*
+	 * Bridge netfilter sysctls are not virtualized, show them in RO mode
+	 * in non-init netns.
+	 */
+	if (!net_eq(net, &init_net)) {
+		num_entries = sizeof(brnf_table) / sizeof(struct ctl_table);
+		for (i = 0; i < num_entries; i++)
+			table[i].mode = 0444;
+	}
+
+	/* Don't export sysctls to unprivileged users */
+	if (ve_net_hide_sysctl(net))
+		table[0].procname = NULL;
+
+	net->ct.brnf_sysctl_header = register_net_sysctl(net, "net/bridge",
+							 table);
+	if (!net->ct.brnf_sysctl_header)
+		goto out_unregister_netfilter;
+
+	return 0;
+
+out_unregister_netfilter:
+	kfree(table);
+out_kmemdup:
+	return -ENOMEM;
+}
+
+static void br_netfilter_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ct.brnf_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.brnf_sysctl_header);
+	kfree(table);
+}
+#else
+static int br_netfilter_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void br_netfilter_fini_sysctl(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */
+
+static int br_netfilter_pernet_init(struct net *net)
+{
+	int ret;
+
+	ret = br_netfilter_init_sysctl(net);
+	return ret;
+}
+
+static void br_netfilter_pernet_exit(struct list_head *net_exit_list)
+{
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		br_netfilter_fini_sysctl(net);
+	}
+}
+
+static struct pernet_operations br_netfilter_net_ops = {
+	.init		= br_netfilter_pernet_init,
+	.exit_batch	= br_netfilter_pernet_exit,
+};
+
 static int __init br_netfilter_init(void)
 {
 	int ret;
 
 	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
 	if (ret < 0)
-		return ret;
+		goto out_start;
+
+	ret = register_pernet_subsys(&br_netfilter_net_ops);
+	if (ret < 0)
+		goto out_pernet;
 
-#ifdef CONFIG_SYSCTL
-	brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
-	if (brnf_sysctl_header == NULL) {
-		printk(KERN_WARNING
-		       "br_netfilter: can't register to sysctl.\n");
-		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-		return -ENOMEM;
-	}
-#endif
 	RCU_INIT_POINTER(nf_br_ops, &br_ops);
 	printk(KERN_NOTICE "Bridge firewalling registered\n");
+
 	return 0;
+
+out_pernet:
+	printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n");
+	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+out_start:
+	return ret;
 }
 
 static void __exit br_netfilter_fini(void)
 {
-	RCU_INIT_POINTER(nf_br_ops, NULL);
+	unregister_pernet_subsys(&br_netfilter_net_ops);
 	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(brnf_sysctl_header);
-#endif
+	RCU_INIT_POINTER(nf_br_ops, NULL);
 }
 
 module_init(br_netfilter_init);

Comments

Pavel Tikhomirov Feb. 28, 2020, 10:37 a.m.
On 2/21/20 7:07 PM, Konstantin Khorenko wrote:
> Kubernetes does some prechecks before run, in particular it requires
> "net.bridge.bridge-nf-call-ip[6]tables" sysctls to be enabled.
> 
> Thus let's make all "net.bridge.*" sysctls visible in Containers
> but (as they are not virtualized) in readonly mode.
> 
> The implementation is not minimal to gain the goal, but it was chosen
> due to 2 reasons:
> 
> 1) it's now similar to netfilter sysctls implementation (although
>     netfilter sysctls are fully virtualized)
> 
> 2) if we ever have to fully virtualize bridge netfilter sysctls,
>     we won't have to rewrite the code completely again.
> 
> https://jira.sw.ru/browse/PSBM-92107
> 
> Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
> ---
>   include/net/netns/conntrack.h   |   1 +
>   net/bridge/br_netfilter_hooks.c | 107 ++++++++++++++++++++++++++++++++++------
>   2 files changed, 93 insertions(+), 15 deletions(-)
> 
> diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
> index f51251d222d63..02c344a7b31d0 100644
> --- a/include/net/netns/conntrack.h
> +++ b/include/net/netns/conntrack.h
> @@ -119,6 +119,7 @@ struct netns_ct {
>   	struct ctl_table_header	*tstamp_sysctl_header;
>   	struct ctl_table_header	*event_sysctl_header;
>   	struct ctl_table_header	*helper_sysctl_header;
> +	struct ctl_table_header	*brnf_sysctl_header;

Isn't this netns_ct for /proc/sys/net/netfilter/nf_conntrack* sysctls only?

>   #endif
>   	char			*slabname;
>   	unsigned int		sysctl_log_invalid; /* Log invalid packets */
> diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
> index da60ef04fa0c5..d34e20f282965 100644
> --- a/net/bridge/br_netfilter_hooks.c
> +++ b/net/bridge/br_netfilter_hooks.c
> @@ -45,7 +45,6 @@
>   #endif
>   
>   #ifdef CONFIG_SYSCTL
> -static struct ctl_table_header *brnf_sysctl_header;
>   static int brnf_call_iptables __read_mostly = 1;
>   static int brnf_call_ip6tables __read_mostly = 1;
>   static int brnf_call_arptables __read_mostly = 1;
> @@ -1004,35 +1003,113 @@ static struct ctl_table brnf_table[] = {
>   };
>   #endif
>   
> +#ifdef CONFIG_SYSCTL
> +static int br_netfilter_init_sysctl(struct net *net)
> +{
> +	struct ctl_table *table;
> +	int num_entries, i;
> +
> +	table = kmemdup(brnf_table, sizeof(brnf_table), GFP_KERNEL);
> +	if (!table)
> +		goto out_kmemdup;
> +
> +	/*
> +	 * Bridge netfilter sysctls are not virtualized, show them in RO mode
> +	 * in non-init netns.
> +	 */
> +	if (!net_eq(net, &init_net)) {
> +		num_entries = sizeof(brnf_table) / sizeof(struct ctl_table);
> +		for (i = 0; i < num_entries; i++)
> +			table[i].mode = 0444;
> +	}
> +
> +	/* Don't export sysctls to unprivileged users */
> +	if (ve_net_hide_sysctl(net))
> +		table[0].procname = NULL;
> +
> +	net->ct.brnf_sysctl_header = register_net_sysctl(net, "net/bridge",
> +							 table);
> +	if (!net->ct.brnf_sysctl_header)
> +		goto out_unregister_netfilter;
> +
> +	return 0;
> +
> +out_unregister_netfilter:
> +	kfree(table);
> +out_kmemdup:
> +	return -ENOMEM;
> +}
> +
> +static void br_netfilter_fini_sysctl(struct net *net)
> +{
> +	struct ctl_table *table;
> +
> +	table = net->ct.brnf_sysctl_header->ctl_table_arg;
> +	unregister_net_sysctl_table(net->ct.brnf_sysctl_header);
> +	kfree(table);
> +}
> +#else
> +static int br_netfilter_init_sysctl(struct net *net)
> +{
> +	return 0;
> +}
> +
> +static void br_netfilter_fini_sysctl(struct net *net)
> +{
> +}
> +#endif /* CONFIG_SYSCTL */
> +
> +static int br_netfilter_pernet_init(struct net *net)
> +{
> +	int ret;
> +
> +	ret = br_netfilter_init_sysctl(net);
> +	return ret;
> +}
> +
> +static void br_netfilter_pernet_exit(struct list_head *net_exit_list)
> +{
> +	struct net *net;
> +
> +	list_for_each_entry(net, net_exit_list, exit_list) {
> +		br_netfilter_fini_sysctl(net);
> +	}
> +}
> +
> +static struct pernet_operations br_netfilter_net_ops = {
> +	.init		= br_netfilter_pernet_init,
> +	.exit_batch	= br_netfilter_pernet_exit,
> +};
> +
>   static int __init br_netfilter_init(void)
>   {
>   	int ret;
>   
>   	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
>   	if (ret < 0)
> -		return ret;
> +		goto out_start;
> +
> +	ret = register_pernet_subsys(&br_netfilter_net_ops);
> +	if (ret < 0)
> +		goto out_pernet;
>   
> -#ifdef CONFIG_SYSCTL
> -	brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
> -	if (brnf_sysctl_header == NULL) {
> -		printk(KERN_WARNING
> -		       "br_netfilter: can't register to sysctl.\n");
> -		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
> -		return -ENOMEM;
> -	}
> -#endif
>   	RCU_INIT_POINTER(nf_br_ops, &br_ops);
>   	printk(KERN_NOTICE "Bridge firewalling registered\n");
> +
>   	return 0;
> +
> +out_pernet:
> +	printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n");
> +	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
> +out_start:
> +	return ret;
>   }
>   
>   static void __exit br_netfilter_fini(void)
>   {
> -	RCU_INIT_POINTER(nf_br_ops, NULL);
> +	unregister_pernet_subsys(&br_netfilter_net_ops);
>   	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
> -#ifdef CONFIG_SYSCTL
> -	unregister_net_sysctl_table(brnf_sysctl_header);
> -#endif
> +	RCU_INIT_POINTER(nf_br_ops, NULL);
>   }
>   
>   module_init(br_netfilter_init);
>