There a two issues with traversal: 1. Key lookup (tree search) cannot detect concurrent modifications and may not find a result in case of parallel modification. 2. Worker does a lockless iteration. This is never safe. Add a sequence counter and re-do the lookup under lock in case the tree was modified / seqcount changed. gc_worker bugs are addressed in the next patch. Signed-off-by: Florian Westphal --- v2: new in this iteration (split from last patch) net/netfilter/nf_conncount.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 8d9c24b69dc9..3c88fb206fb4 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -58,6 +58,7 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp struct nf_conncount_root { struct rb_root root; + seqcount_t count; }; struct nf_conncount_data { @@ -382,8 +383,10 @@ static void tree_nodes_free(struct nf_conncount_root *root, rbconn = gc_nodes[--gc_count]; spin_lock(&rbconn->list.list_lock); if (!rbconn->list.count) { + write_seqcount_begin(&root->count); rb_erase(&rbconn->node, &root->root); call_rcu(&rbconn->rcu_head, __tree_nodes_free); + write_seqcount_end(&root->count); } spin_unlock(&rbconn->list.list_lock); } @@ -478,8 +481,10 @@ insert_tree(struct net *net, count = 1; rbconn->list.count = count; + write_seqcount_begin(&root->count); rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, &root->root); + write_seqcount_end(&root->count); } out_unlock: if (refcounted) @@ -492,6 +497,7 @@ static struct nf_conncount_rb * find_tree_node(struct nf_conncount_root *root, struct nf_conncount_data *data, const u32 *key) { + unsigned int seq = read_seqcount_begin(&root->count); struct rb_node *parent; parent = rcu_dereference_raw(root->root.rb_node); @@ -509,6 +515,9 @@ find_tree_node(struct nf_conncount_root *root, struct nf_conncount_data *data, } else { return rbconn; } + + if (read_seqcount_retry(&root->count, seq)) + return ERR_PTR(-EAGAIN); } return ERR_PTR(-ENOENT); @@ -531,6 +540,12 @@ count_tree(struct net *net, rbconn = find_tree_node(root, data, key); if (IS_ERR(rbconn)) { + if (PTR_ERR(rbconn) == -EAGAIN) { + spin_lock_bh(&nf_conncount_locks[hash]); + rbconn = find_tree_node(root, data, key); + spin_unlock_bh(&nf_conncount_locks[hash]); + } + if (PTR_ERR(rbconn) == -ENOENT) { if (!skb) return 0; @@ -647,6 +662,7 @@ EXPORT_SYMBOL_GPL(nf_conncount_count_skb); static void nf_conncount_root_init(struct nf_conncount_root *r) { r->root = RB_ROOT; + seqcount_init(&r->count); } struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen) -- 2.54.0