[RHEL7,COMMIT] ms/netfilter: nft_rbtree: allow adjacent intervals with dynamic updates

Submitted by Vasily Averin on Dec. 20, 2020, 8:48 a.m.

Details

Message ID 202012200848.0BK8mtLD005013@vz7build.vvs.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Vasily Averin Dec. 20, 2020, 8:48 a.m.
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.1
------>
commit 9502f209cb4a917d632ee02db00688e090656fea
Author: Pablo Neira Ayuso <pablo@netfilter.org>
Date:   Sun Dec 20 11:48:55 2020 +0300

    ms/netfilter: nft_rbtree: allow adjacent intervals with dynamic updates
    
    This patch fixes dynamic element updates for adjacent intervals in the
    rb-tree representation.
    
    Since elements are sorted in the rb-tree, in case of adjacent nodes with
    the same key, the assumption is that an interval end node must be placed
    before an interval opening.
    
    In tree lookup operations, the idea is to search for the closer element
    that is smaller than the one we're searching for. Given that we'll have
    two possible matchings, we have to take the opening interval in case of
    adjacent nodes.
    
    Range merges are not trivial with the current representation,
    specifically we have to check if node extensions are equal and make sure
    we keep the existing internal states around.
    
    Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
    (cherry-picked from commit e701001e7cbe88cdc937037f6f398669eef7e7ff)
    VvS: minor context changes
    https://jira.sw.ru/browse/PSBM-121318
    Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 net/netfilter/nft_rbtree.c | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 9ed51e9..ea06aa9 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -35,6 +35,12 @@  static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
 	       (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
 }
 
+static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
+			     const struct nft_rbtree_elem *interval)
+{
+	return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+}
+
 static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
 			      const struct nft_set_ext **ext)
 {
@@ -42,6 +48,7 @@  static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
 	const struct nft_rbtree_elem *rbe, *interval = NULL;
 	const struct rb_node *parent;
 	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
+	const void *this;
 	int d;
 
 	spin_lock_bh(&nft_rbtree_lock);
@@ -49,9 +56,16 @@  static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
 	while (parent != NULL) {
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
-		d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
+		this = nft_set_ext_key(&rbe->ext);
+		d = memcmp(this, key, set->klen);
 		if (d < 0) {
 			parent = parent->rb_left;
+			/* In case of adjacent ranges, we always see the high
+			 * part of the range in first place, before the low one.
+			 * So don't update interval if the keys are equal.
+			 */
+			if (interval && nft_rbtree_equal(set, this, interval))
+				continue;
 			interval = rbe;
 		} else if (d > 0)
 			parent = parent->rb_right;
@@ -103,9 +117,16 @@  static int __nft_rbtree_insert(const struct nft_set *set,
 		else if (d > 0)
 			p = &parent->rb_right;
 		else {
-			if (nft_set_elem_active(&rbe->ext, genmask))
-				return -EEXIST;
-			p = &parent->rb_left;
+			if (nft_set_elem_active(&rbe->ext, genmask)) {
+				if (nft_rbtree_interval_end(rbe) &&
+				    !nft_rbtree_interval_end(new))
+					p = &parent->rb_left;
+				else if (!nft_rbtree_interval_end(rbe) &&
+					 nft_rbtree_interval_end(new))
+					p = &parent->rb_right;
+				else
+					return -EEXIST;
+			}
 		}
 	}
 	rb_link_node(&new->node, parent, p);
@@ -158,7 +179,7 @@  static void *nft_rbtree_deactivate(const struct nft_set *set,
 {
 	const struct nft_rbtree *priv = nft_set_priv(set);
 	const struct rb_node *parent = priv->root.rb_node;
-	struct nft_rbtree_elem *rbe;
+	struct nft_rbtree_elem *rbe, *this = elem->priv;
 	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	int d;
 
@@ -176,6 +197,15 @@  static void *nft_rbtree_deactivate(const struct nft_set *set,
 				parent = parent->rb_left;
 				continue;
 			}
+			if (nft_rbtree_interval_end(rbe) &&
+			    !nft_rbtree_interval_end(this)) {
+				parent = parent->rb_left;
+				continue;
+			} else if (!nft_rbtree_interval_end(rbe) &&
+				   nft_rbtree_interval_end(this)) {
+				parent = parent->rb_right;
+				continue;
+			}
 			nft_rbtree_deactivate_one(set, rbe);
 			return rbe;
 		}