There a two issues with traversal: 1. Key lookup (tree search) cannot detect concurrent modifications and may not find a result in case of parallel modification. 2. Worker does a lockless iteration. This is never safe. Add a sequence counter and re-do the lookup under lock in case the tree was modified / seqcount changed. gc_worker bugs are addressed in the next patch. Signed-off-by: Florian Westphal --- v3: return -EAGAIN if no match found but tree was altered. net/netfilter/nf_conncount.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 90d29a8684c0..2183649085b5 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -58,6 +58,7 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp struct nf_conncount_root { struct rb_root root; + seqcount_t count; }; struct nf_conncount_data { @@ -382,8 +383,10 @@ static void tree_nodes_free(struct nf_conncount_root *root, rbconn = gc_nodes[--gc_count]; spin_lock(&rbconn->list.list_lock); if (!rbconn->list.count) { + write_seqcount_begin(&root->count); rb_erase(&rbconn->node, &root->root); call_rcu(&rbconn->rcu_head, __tree_nodes_free); + write_seqcount_end(&root->count); } spin_unlock(&rbconn->list.list_lock); } @@ -478,8 +481,10 @@ insert_tree(struct net *net, count = 1; rbconn->list.count = count; + write_seqcount_begin(&root->count); rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, &root->root); + write_seqcount_end(&root->count); } out_unlock: if (refcounted) @@ -493,6 +498,7 @@ find_tree_node(struct nf_conncount_root *root, struct nf_conncount_data *data, const u32 *key, bool locked) { + unsigned int seq = read_seqcount_begin(&root->count); struct rb_node *parent; parent = rcu_dereference_check(root->root.rb_node, locked); @@ -509,8 +515,14 @@ find_tree_node(struct nf_conncount_root *root, struct nf_conncount_data *data, parent = rcu_dereference_check(parent->rb_right, locked); else return rbconn; + + if (read_seqcount_retry(&root->count, seq)) + return ERR_PTR(-EAGAIN); } + if (read_seqcount_retry(&root->count, seq)) + return ERR_PTR(-EAGAIN); + return ERR_PTR(-ENOENT); } @@ -531,6 +543,12 @@ count_tree(struct net *net, rbconn = find_tree_node(root, data, key, false); if (IS_ERR(rbconn)) { + if (PTR_ERR(rbconn) == -EAGAIN) { + spin_lock_bh(&nf_conncount_locks[hash]); + rbconn = find_tree_node(root, data, key, true); + spin_unlock_bh(&nf_conncount_locks[hash]); + } + if (PTR_ERR(rbconn) == -ENOENT) { if (!skb) return 0; @@ -647,6 +665,7 @@ EXPORT_SYMBOL_GPL(nf_conncount_count_skb); static void nf_conncount_root_init(struct nf_conncount_root *r) { r->root = RB_ROOT; + seqcount_init(&r->count); } struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen) -- 2.54.0