[RH7] nat: allow nft NAT and iptables NAT work on the same node

Submitted by Vasily Averin on Dec. 28, 2020, 6:46 a.m.

Details

Message ID 3b7e6479-aef3-dbfc-c31f-44ac289fc91b@virtuozzo.com
State New
Series "nat: allow nft NAT and iptables NAT work on the same node"
Headers show

Commit Message

Vasily Averin Dec. 28, 2020, 6:46 a.m.
The netfilter NAT core cannot deal with more than one NAT hook 
per hook location (prerouting, input ...), because the NAT hooks install
a NAT null binding in case the iptables nat table (iptable_nat hooks)
or the corresponding nftables chain (nft nat hooks) doesn't specify a nat
transformation.

Currently iptables NAT hook is called in all netns, even if according
iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does nothing.
this block execution of corrsponding nft NAT hook, even if it is present.
This is true in reverted direction: if nft NAT hook was called first it blocks
execution of iptbles nat hook, because corresponding conntrack already have
NAT null binding.

This patch allows nft nat hook to be sure if coresponding NAT kind is enabled
and is in use in current net namespace.

The patch does not allow nft to add new NAT chains if netns already have another one:
either iptables nat or an another nft NAT chain with the same priority.

Patch does not block the loading of the iptables NAT if nft NAT is already present,
because it is quite rare case.

In general this patch allows to work NAT both on host and inside iptables-containers
and inside centos8 containers where nftables nat is only used without any additional
configuration.

https://jira.sw.ru/browse/PSBM-123345
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 include/net/netfilter/nf_nat.h           | 24 +++++++++++++++++-------
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c |  4 ++--
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c |  4 ++--
 net/netfilter/core.c                     | 13 ++++++++++++-
 4 files changed, 33 insertions(+), 12 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c9ca6ed..4308180 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -79,23 +79,33 @@  static inline bool nf_nat_oif_changed(unsigned int hooknum,
 }
 
 /*
- * Check if nft chain's netns fits conntrack netns.
+ * Check if netns have enabled NAT.
  * Uses ops->is_nft_ops flag to detect nft ops.
- * If ops is not nft-related, the check is considered passed.
+ * If ops is not nft-related we need to be sure
+ * that according ipt nat table is not empty and can be in use.
  */
-#define is_valid_netns(ops, ct) ({					\
+#define netns_nat_check(ops, pf, __net) ({				\
 	const struct nft_chain *__chain;				\
 	const struct net *__chain_net;					\
-	const struct net *__net;					\
 	bool __ret;							\
 									\
 	if (ops->is_nft_ops) {						\
 		__chain = ops->priv;					\
 		__chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\
-		__net = nf_ct_net(ct);					\
 		__ret = net_eq(__net, __chain_net);			\
-	} else								\
-		__ret = true;						\
+	} else {							\
+		struct xt_table_info *__priv = NULL;			\
+		if (pf == NFPROTO_IPV4 &&				\
+		    !IS_ERR_OR_NULL(__net->ipv4.nat_table))		\
+			__priv = __net->ipv4.nat_table->private		\
+		else if (pf == NFPROTO_IPV6 &&				\
+			 !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat))	\
+			__priv = __net->ipv6.ip6table_nat->private;	\
+		if (__priv && __priv->number > __priv->initial_entries)	\
+			__ret = true;					\
+		else							\
+			__ret = false;					\
+	}								\
 	__ret;								\
 })
 #endif
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 3a261e2..a202287 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -292,8 +292,8 @@  nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			/* Ignore nft chains with wrong netns. */
-			if (!is_valid_netns(ops, ct))
+			/* Ignore if nft/ipt NAT is not used in this netns */
+			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
 				return NF_ACCEPT;
 
 			ret = do_chain(ops, skb, state, ct);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index fdedb60..99a6799 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -305,8 +305,8 @@  nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			/* Ignore nft chains with wrong netns. */
-			if (!is_valid_netns(ops, ct))
+			/* Ignore if nft/ipt NAT is not used in this netns */
+			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
 				return NF_ACCEPT;
 
 			ret = do_chain(ops, skb, state, ct);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4ad3d13..74dee8c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,7 +64,8 @@  EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
-
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
 int nf_register_hook(struct nf_hook_ops *reg)
 {
 	struct nf_hook_ops *elem;
@@ -73,6 +74,16 @@  int nf_register_hook(struct nf_hook_ops *reg)
 	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
 		if (reg->priority < elem->priority)
 			break;
+		else if ((reg->priority == elem->priority) && reg->is_nft_ops) {
+			const struct nft_chain *c = reg->priv;
+			struct net *net = read_pnet(&nft_base_chain(c)->pnet);
+
+			/* fail if netns already have enabled nft/ipt nat */
+			if (netns_nat_check(elem, reg->pf, net)) {
+				mutex_unlock(&nf_hook_mutex);
+				return -EBUSY;
+			}
+		}
 	}
 	list_add_rcu(&reg->list, elem->list.prev);
 	mutex_unlock(&nf_hook_mutex);

Comments

Konstantin Khorenko Dec. 28, 2020, 12:12 p.m.
On 12/28/2020 09:46 AM, Vasily Averin wrote:
> The netfilter NAT core cannot deal with more than one NAT hook
> per hook location (prerouting, input ...), because the NAT hooks install
> a NAT null binding in case the iptables nat table (iptable_nat hooks)
> or the corresponding nftables chain (nft nat hooks) doesn't specify a nat
> transformation.
>
> Currently iptables NAT hook is called in all netns, even if according
> iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does nothing.
> this block execution of corrsponding nft NAT hook, even if it is present.
> This is true in reverted direction: if nft NAT hook was called first it blocks
> execution of iptbles nat hook, because corresponding conntrack already have
> NAT null binding.
>
> This patch allows nft nat hook to be sure if coresponding NAT kind is enabled
> and is in use in current net namespace.
>
> The patch does not allow nft to add new NAT chains if netns already have another one:
> either iptables nat or an another nft NAT chain with the same priority.
>
> Patch does not block the loading of the iptables NAT if nft NAT is already present,
> because it is quite rare case.
>
> In general this patch allows to work NAT both on host and inside iptables-containers
> and inside centos8 containers where nftables nat is only used without any additional
> configuration.
>
> https://jira.sw.ru/browse/PSBM-123345
> Signed-off-by: Vasily Averin <vvs@virtuozzo.com>

Reviewed-by: Konstantin Khorenko <khorenko@virtuozzo.com>

> ---
>  include/net/netfilter/nf_nat.h           | 24 +++++++++++++++++-------
>  net/ipv4/netfilter/nf_nat_l3proto_ipv4.c |  4 ++--
>  net/ipv6/netfilter/nf_nat_l3proto_ipv6.c |  4 ++--
>  net/netfilter/core.c                     | 13 ++++++++++++-
>  4 files changed, 33 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
> index c9ca6ed..4308180 100644
> --- a/include/net/netfilter/nf_nat.h
> +++ b/include/net/netfilter/nf_nat.h
> @@ -79,23 +79,33 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
>  }
>
>  /*
> - * Check if nft chain's netns fits conntrack netns.
> + * Check if netns have enabled NAT.
>   * Uses ops->is_nft_ops flag to detect nft ops.
> - * If ops is not nft-related, the check is considered passed.
> + * If ops is not nft-related we need to be sure
> + * that according ipt nat table is not empty and can be in use.
>   */
> -#define is_valid_netns(ops, ct) ({					\
> +#define netns_nat_check(ops, pf, __net) ({				\
>  	const struct nft_chain *__chain;				\
>  	const struct net *__chain_net;					\
> -	const struct net *__net;					\
>  	bool __ret;							\
>  									\
>  	if (ops->is_nft_ops) {						\
>  		__chain = ops->priv;					\
>  		__chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\
> -		__net = nf_ct_net(ct);					\
>  		__ret = net_eq(__net, __chain_net);			\
> -	} else								\
> -		__ret = true;						\
> +	} else {							\
> +		struct xt_table_info *__priv = NULL;			\
> +		if (pf == NFPROTO_IPV4 &&				\
> +		    !IS_ERR_OR_NULL(__net->ipv4.nat_table))		\
> +			__priv = __net->ipv4.nat_table->private		\
							       ^^^^^

Add ";".
Just for code beauty. :)

> +		else if (pf == NFPROTO_IPV6 &&				\
> +			 !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat))	\
> +			__priv = __net->ipv6.ip6table_nat->private;	\
> +		if (__priv && __priv->number > __priv->initial_entries)	\
> +			__ret = true;					\
> +		else							\
> +			__ret = false;					\
> +	}								\
>  	__ret;								\
>  })
>  #endif
> diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> index 3a261e2..a202287 100644
> --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> @@ -292,8 +292,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
>  		if (!nf_nat_initialized(ct, maniptype)) {
>  			unsigned int ret;
>
> -			/* Ignore nft chains with wrong netns. */
> -			if (!is_valid_netns(ops, ct))
> +			/* Ignore if nft/ipt NAT is not used in this netns */
> +			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
>  				return NF_ACCEPT;
>
>  			ret = do_chain(ops, skb, state, ct);
> diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> index fdedb60..99a6799 100644
> --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> @@ -305,8 +305,8 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
>  		if (!nf_nat_initialized(ct, maniptype)) {
>  			unsigned int ret;
>
> -			/* Ignore nft chains with wrong netns. */
> -			if (!is_valid_netns(ops, ct))
> +			/* Ignore if nft/ipt NAT is not used in this netns */
> +			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
>  				return NF_ACCEPT;
>
>  			ret = do_chain(ops, skb, state, ct);
> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> index 4ad3d13..74dee8c 100644
> --- a/net/netfilter/core.c
> +++ b/net/netfilter/core.c
> @@ -64,7 +64,8 @@ EXPORT_SYMBOL(nf_hooks_needed);
>  #endif
>
>  static DEFINE_MUTEX(nf_hook_mutex);
> -
> +#include <net/netfilter/nf_tables.h>
> +#include <net/netfilter/nf_nat.h>
>  int nf_register_hook(struct nf_hook_ops *reg)
>  {
>  	struct nf_hook_ops *elem;
> @@ -73,6 +74,16 @@ int nf_register_hook(struct nf_hook_ops *reg)
>  	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
>  		if (reg->priority < elem->priority)
>  			break;
> +		else if ((reg->priority == elem->priority) && reg->is_nft_ops) {
> +			const struct nft_chain *c = reg->priv;
> +			struct net *net = read_pnet(&nft_base_chain(c)->pnet);
> +
> +			/* fail if netns already have enabled nft/ipt nat */
> +			if (netns_nat_check(elem, reg->pf, net)) {
> +				mutex_unlock(&nf_hook_mutex);
> +				return -EBUSY;
> +			}
> +		}
>  	}
>  	list_add_rcu(&reg->list, elem->list.prev);
>  	mutex_unlock(&nf_hook_mutex);
>