[Devel,RHEL7,COMMIT] net: Primitives to enable conntrack allocation

Submitted by Konstantin Khorenko on Sept. 13, 2016, 9:09 a.m.

Details

Message ID 201609130909.u8D99u9e016063@finist_cl7.x64_64.work.ct
State New
Series "Create conntrack structures only if they are really needed"
Headers show

Commit Message

Konstantin Khorenko Sept. 13, 2016, 9:09 a.m.
The commit is pushed to "branch-rh7-3.10.0-327.28.2.vz7.17.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.28.2.vz7.17.5
------>
commit f41d267ea23750b13c6c2d5434bc3b9c60bd888e
Author: Kirill Tkhai <ktkhai@virtuozzo.com>
Date:   Tue Sep 13 13:09:56 2016 +0400

    net: Primitives to enable conntrack allocation
    
    Patchset description:
    
    Create conntrack structures only if they are really needed
    
    Allocate conntracks only after there is a rule which uses them.
    
    v2: Allow after there is a rule and never prohibit.
    
    khorenko@: the idea behind all of this:
    we want to provide the possibility to Containers to use iptables rules which
    require conntracks. At the same time we'd like to avoid problem we currently
    have in case we just enable conntracks allocation for all Containers and
    Hardware Node by default:
    1) in case conntracks are really not used by a CT - structures are still
       allocated decreasing the performance
    2) number of conntracks in the system is limited => DDoS is possible
    
    So we decided to implement a feature:
    not to allocate conntracks until there are rules in the netspace which require
    them.
    
    Disadvantage: if a user on live system loads iptables rule which requires
    conntracks, connections which are already alive can be handled not that
    precise. i believe this is OK.
    
    Once conntracks allocation is enabled, it cannot be disabled until reboot/CT
    restart. This is done in order to:
    a) simplify the code
    b) to have a possbility to unconditionally enable conntracks, for example for
       userspace conntrack users (http://conntrack-tools.netfilter.org/manual.html)
    c) adding a new iptables rule is implemented in the following way:
       - all rules are unloaded
       - new rule is added to the bunch of rules
       - all rules (including the new one) are uploaded to the kernel
       => each new rule add results in conntrack allocation disable/enable =>
       race window for unhandled connections
    
    =======================
    This patch description:
    
    Allocation are allowed only when there are conntracks users.
    By default they are prohibited.
    
    https://jira.sw.ru/browse/PSBM-51050
    
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    Reviewed-by: Andrei Vagin <avagin@virtuozzo.com>
---
 include/net/net_namespace.h          | 10 ++++++++++
 include/net/netns/conntrack.h        |  1 +
 net/netfilter/nf_conntrack_core.c    |  9 ++++++++-
 net/netfilter/nf_conntrack_netlink.c |  1 +
 net/netfilter/nf_synproxy_core.c     |  1 +
 5 files changed, 21 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 866ee80..2baa2df 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -299,6 +299,16 @@  static inline struct net *read_pnet(possible_net_t const *pnet)
 #define __net_initconst	__initconst
 #endif
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+static inline void allow_conntrack_allocation(struct net *net)
+{
+	net->ct.can_alloc = true;
+	smp_wmb(); /* Pairs with rmb in __nf_conntrack_alloc() */
+}
+#else
+static inline void allow_conntrack_allocation(struct net *net) { }
+#endif
+
 int peernet2id_alloc(struct net *net, struct net *peer);
 int peernet2id(struct net *net, struct net *peer);
 bool peernet_has_id(struct net *net, struct net *peer);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 4d7de37..0c2a685 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -74,6 +74,7 @@  struct ct_pcpu {
 
 struct netns_ct {
 	atomic_t		count;
+	bool			can_alloc;
 	unsigned int		max;
 	unsigned int		expect_count;
 	unsigned int		expect_max;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0c94c3a..15e8479 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -841,6 +841,12 @@  __nf_conntrack_alloc(struct net *net, u16 zone,
 	unsigned int ct_max = net->ct.max ? net->ct.max : init_net.ct.max;
 	struct nf_conn *ct;
 
+	if (!net->ct.can_alloc) {
+		/* No rules loaded */
+		return NULL;
+	}
+	smp_rmb(); /* Pairs with wmb in allow_conntrack_allocation() */
+
 	if (unlikely(!nf_conntrack_hash_rnd)) {
 		init_nf_conntrack_hash_rnd();
 		/* recompute the hash as nf_conntrack_hash_rnd is initialized */
@@ -963,7 +969,7 @@  init_conntrack(struct net *net, struct nf_conn *tmpl,
 
 	ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
 				  hash);
-	if (IS_ERR(ct))
+	if (IS_ERR_OR_NULL(ct))
 		return (struct nf_conntrack_tuple_hash *)ct;
 
 	if (tmpl && nfct_synproxy(tmpl)) {
@@ -1816,6 +1822,7 @@  int nf_conntrack_init_net(struct net *net)
 	int cpu;
 
 	atomic_set(&net->ct.count, 0);
+	net->ct.can_alloc = false;
 	net->ct.max = init_net.ct.max;
 	seqcount_init(&net->ct.generation);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d6b6465..aad05a0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1617,6 +1617,7 @@  ctnetlink_create_conntrack(struct net *net, u16 zone,
 	struct nf_conntrack_helper *helper;
 	struct nf_conn_tstamp *tstamp;
 
+	allow_conntrack_allocation(net);
 	ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 52e20c9..779e5a6 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -353,6 +353,7 @@  static int __net_init synproxy_net_init(struct net *net)
 	int err = -ENOMEM;
 
 	memset(&t, 0, sizeof(t));
+	allow_conntrack_allocation(net);
 	ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
 	if (IS_ERR(ct)) {
 		err = PTR_ERR(ct);