The existing partial overlap detection does not check if the elements belong to the interval, eg. add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5 } add element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT Similar situation occurs with deletions: add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5} delete element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT This currently works via mitigation by nft in userspace, which is performing the overlap detection before sending the elements to the kernel. This requires a previous netlink dump of the set content which slows down incremental updates on interval sets, because a netlink set content dump is needed. This patch extends the existing overlap detection to track the most recent start element that already exists. The pointer to the existing start element is stored as a cookie (no pointer dereference is ever possible). If the end element is added and it already exists, then check that the existing end element is adjacent to the already existing start element. Similar logic applies to element deactivation. There is still a few more corner cases of overlap detection related to the open interval that are addressed in follow up patches. Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 127 ++++++++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 0581184cacf9..6580b8e2ec25 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -33,6 +33,7 @@ struct nft_rbtree { rwlock_t lock; struct nft_array __rcu *array; struct nft_array *array_next; + unsigned long start_rbe_cookie; unsigned long last_gc; }; @@ -278,6 +279,67 @@ static struct nft_rbtree_elem *nft_rbtree_prev_active(struct nft_rbtree_elem *rb return rb_entry(node, struct nft_rbtree_elem, node); } +static struct nft_rbtree_elem * +__nft_rbtree_next_active(struct rb_node *node, u8 genmask) +{ + struct nft_rbtree_elem *next_rbe; + + while (node) { + next_rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (!nft_set_elem_active(&next_rbe->ext, genmask)) { + node = rb_next(node); + continue; + } + + return next_rbe; + } + + return NULL; +} + +static struct nft_rbtree_elem * +nft_rbtree_next_active(struct nft_rbtree_elem *rbe, u8 genmask) +{ + return __nft_rbtree_next_active(rb_next(&rbe->node), genmask); +} + +static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv, + const struct nft_rbtree_elem *rbe) +{ + priv->start_rbe_cookie = (unsigned long)rbe; +} + +static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv, + const struct nft_rbtree_elem *rbe) +{ + return priv->start_rbe_cookie == (unsigned long)rbe; +} + +static bool nft_rbtree_insert_same_interval(const struct net *net, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + u8 genmask = nft_genmask_next(net); + struct nft_rbtree_elem *next_rbe; + + if (!priv->start_rbe_cookie) + return true; + + next_rbe = nft_rbtree_next_active(rbe, genmask); + if (next_rbe) { + /* Closest start element differs from last element added. */ + if (nft_rbtree_interval_start(next_rbe) && + nft_rbtree_cmp_start_cookie(priv, next_rbe)) { + priv->start_rbe_cookie = 0; + return true; + } + } + + priv->start_rbe_cookie = 0; + + return false; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_elem_priv **elem_priv) @@ -393,12 +455,18 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } } + if (nft_rbtree_interval_null(set, new)) + priv->start_rbe_cookie = 0; + else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie) + priv->start_rbe_cookie = 0; + /* - new start element matching existing start element: full overlap * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. */ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { *elem_priv = &rbe_ge->priv; + nft_rbtree_set_start_cookie(priv, rbe_ge); return -EEXIST; } @@ -414,6 +482,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, return -ECANCELED; *elem_priv = &rbe_le->priv; + + /* - start and end element belong to the same interval. */ + if (!nft_rbtree_insert_same_interval(net, priv, rbe_le)) + return -ENOTEMPTY; + return -EEXIST; } @@ -603,6 +676,48 @@ static void nft_rbtree_activate(const struct net *net, nft_clear(net, &rbe->ext); } +static struct nft_rbtree_elem * +nft_rbtree_next_inactive(struct nft_rbtree_elem *rbe, u8 genmask) +{ + struct nft_rbtree_elem *next_rbe; + struct rb_node *node; + + node = rb_next(&rbe->node); + if (node) { + next_rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (nft_rbtree_interval_start(next_rbe) && + !nft_set_elem_active(&next_rbe->ext, genmask)) + return next_rbe; + } + + return NULL; +} + +static bool nft_rbtree_deactivate_same_interval(const struct net *net, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + u8 genmask = nft_genmask_next(net); + struct nft_rbtree_elem *next_rbe; + + if (!priv->start_rbe_cookie) + return true; + + next_rbe = nft_rbtree_next_inactive(rbe, genmask); + if (next_rbe) { + /* Closest start element differs from last element added. */ + if (nft_rbtree_interval_start(next_rbe) && + nft_rbtree_cmp_start_cookie(priv, next_rbe)) { + priv->start_rbe_cookie = 0; + return true; + } + } + + priv->start_rbe_cookie = 0; + + return false; +} + static void nft_rbtree_flush(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) @@ -617,12 +732,16 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); - const struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; u8 genmask = nft_genmask_next(net); u64 tstamp = nft_net_tstamp(net); int d; + if (nft_rbtree_interval_start(this) || + nft_rbtree_interval_null(set, this)) + priv->start_rbe_cookie = 0; + if (nft_array_may_resize(set) < 0) return NULL; @@ -650,6 +769,12 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, parent = parent->rb_left; continue; } + + if (nft_rbtree_interval_start(rbe)) + nft_rbtree_set_start_cookie(priv, rbe); + else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe)) + return NULL; + nft_rbtree_flush(net, set, &rbe->priv); return &rbe->priv; } -- 2.47.3