There a two issues with traversal: 1. Key lookup (tree search) cannot detect concurrent modifications and may not find a result in case of parallel modification. 2. Worker does a lockless iteration. This is never safe. Add a sequence counter and re-do the lookup under lock in case the tree was modified / seqcount changed. gc_worker bugs are addressed in the next patch. Signed-off-by: Florian Westphal --- v4: use seqcount_spinlock_t. net/netfilter/nf_conncount.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 56ac64ecfb75..1247cbe77740 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -57,6 +57,7 @@ struct nf_conncount_rb { struct nf_conncount_root { struct rb_root root; spinlock_t lock; + seqcount_spinlock_t count; }; struct nf_conncount_data { @@ -382,8 +383,10 @@ static void tree_nodes_free(struct nf_conncount_root *root, rbconn = gc_nodes[--gc_count]; spin_lock(&rbconn->list.list_lock); if (!rbconn->list.count) { + write_seqcount_begin(&root->count); rb_erase(&rbconn->node, &root->root); call_rcu(&rbconn->rcu_head, __tree_nodes_free); + write_seqcount_end(&root->count); } spin_unlock(&rbconn->list.list_lock); } @@ -478,8 +481,10 @@ insert_tree(struct net *net, count = 1; rbconn->list.count = count; + write_seqcount_begin(&root->count); rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, &root->root); + write_seqcount_end(&root->count); } out_unlock: if (refcounted) @@ -492,6 +497,7 @@ static struct nf_conncount_rb * find_tree_node(struct nf_conncount_root *root, struct nf_conncount_data *data, const u32 *key) { + unsigned int seq = read_seqcount_begin(&root->count); struct rb_node *parent; parent = rcu_dereference_check(root->root.rb_node, @@ -511,8 +517,14 @@ find_tree_node(struct nf_conncount_root *root, struct nf_conncount_data *data, lockdep_is_held(&root->lock)); else return rbconn; + + if (read_seqcount_retry(&root->count, seq)) + return ERR_PTR(-EAGAIN); } + if (read_seqcount_retry(&root->count, seq)) + return ERR_PTR(-EAGAIN); + return ERR_PTR(-ENOENT); } @@ -533,6 +545,12 @@ count_tree(struct net *net, rbconn = find_tree_node(root, data, key); if (IS_ERR(rbconn)) { + if (PTR_ERR(rbconn) == -EAGAIN) { + spin_lock_bh(&root->lock); + rbconn = find_tree_node(root, data, key); + spin_unlock_bh(&root->lock); + } + if (PTR_ERR(rbconn) == -ENOENT) { if (!skb) return 0; @@ -650,6 +668,7 @@ static void nf_conncount_root_init(struct nf_conncount_root *r) { r->root = RB_ROOT; spin_lock_init(&r->lock); + seqcount_spinlock_init(&r->count, &r->lock); } struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen) -- 2.53.0