From: Ruide Cao Hash ipset resize can replay concurrent kernel-side updates after moving entries into a new table. For sets with comments enabled, the resize path copied element storage as-is, which let old and new tables temporarily share comment extension state. When a concurrent delete or timeout cleanup released the old-table entry before replay completed, later processing could operate on already released comment state and trigger warnings or crashes in debug configurations. Fix this by cloning comment extensions while migrating entries so resized tables own independent comment state, and make sure old resize tables are destroyed with extension cleanup once they are no longer referenced. Fixes: f66ee0410b1c ("netfilter: ipset: Fix \"INFO: rcu detected stall in hash_xxx\" reports") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ruide Cao Tested-by: Ren Wei Signed-off-by: Ren Wei --- include/linux/netfilter/ipset/ip_set.h | 2 ++ net/netfilter/ipset/ip_set_core.c | 25 +++++++++++++++++++++++++ net/netfilter/ipset/ip_set_hash_gen.h | 19 +++++++++++++------ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index b98331572ad2..5722b36e95a1 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -501,6 +501,8 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) void ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, const struct ip_set_ext *ext); +int ip_set_clone_comment(struct ip_set *set, struct ip_set_comment *dst, + const struct ip_set_comment *src); static inline void ip_set_init_counter(struct ip_set_counter *counter, diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c5a26236a0bb..d3e5358f1780 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -367,6 +367,31 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, } EXPORT_SYMBOL_GPL(ip_set_init_comment); +int +ip_set_clone_comment(struct ip_set *set, struct ip_set_comment *dst, + const struct ip_set_comment *src) +{ + struct ip_set_comment_rcu *c, *newc; + size_t len; + + c = rcu_dereference_bh(src->c); + RCU_INIT_POINTER(dst->c, NULL); + if (!c) + return 0; + + len = strlen(c->str); + newc = kmalloc(sizeof(*newc) + len + 1, GFP_ATOMIC); + if (!newc) + return -ENOMEM; + + memcpy(newc->str, c->str, len + 1); + set->ext_size += sizeof(*newc) + len + 1; + rcu_assign_pointer(dst->c, newc); + + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_clone_comment); + /* Used only when dumping a set, protected by rcu_read_lock() */ static int ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index b79e5dd2af03..17331bcbd4ec 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -584,7 +584,7 @@ mtype_gc(struct work_struct *work) if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize by expire: %p\n", t); - mtype_ahash_destroy(set, t, false); + mtype_ahash_destroy(set, t, true); } queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); @@ -743,6 +743,13 @@ mtype_resize(struct ip_set *set, bool retried) } d = ahash_data(m, m->pos, dsize); memcpy(d, data, dsize); + if (SET_WITH_COMMENT(set)) { + ret = ip_set_clone_comment(set, + ext_comment(d, set), + ext_comment(data, set)); + if (ret) + goto cleanup; + } set_bit(m->pos++, m->used); t->hregion[nr].elements++; #ifdef IP_SET_HASH_WITH_NETS @@ -778,7 +785,7 @@ mtype_resize(struct ip_set *set, bool retried) /* If there's nobody else using the table, destroy it */ if (atomic_dec_and_test(&orig->uref)) { pr_debug("Table destroy by resize %p\n", orig); - mtype_ahash_destroy(set, orig, false); + mtype_ahash_destroy(set, orig, true); } out: @@ -791,7 +798,7 @@ mtype_resize(struct ip_set *set, bool retried) rcu_read_unlock_bh(); atomic_set(&orig->ref, 0); atomic_dec(&orig->uref); - mtype_ahash_destroy(set, t, false); + mtype_ahash_destroy(set, t, true); if (ret == -EAGAIN) goto retry; goto out; @@ -1023,7 +1030,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, out: if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize by add: %p\n", t); - mtype_ahash_destroy(set, t, false); + mtype_ahash_destroy(set, t, true); } return ret; } @@ -1135,7 +1142,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, } if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize by del: %p\n", t); - mtype_ahash_destroy(set, t, false); + mtype_ahash_destroy(set, t, true); } return ret; } @@ -1341,7 +1348,7 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize " " by dump: %p\n", t); - mtype_ahash_destroy(set, t, false); + mtype_ahash_destroy(set, t, true); } cb->args[IPSET_CB_PRIVATE] = 0; } -- 2.34.1