Currently, fib_info_hash_bucket does not account for MPLS labels (lwtunnel state) when calculating the hash for fib_info objects. This leads to massive hash collisions when many routes are configured with the same gateway but different MPLS labels. To resolve this, introduce lwtunnel_get_encap_hash() helper which calls a new .get_encap_hash callback in lwtunnel_encap_ops. Implement this callback for mpls_iptunnel to provide a hash of the MPLS label set. This ensures proper distribution in the fib_info_hash table, improving route installation and deletion performance by avoiding massive hash collisions. In a test case with 100,000 MPLS routes, this changes the algorithmic complexity from O(N) lookup in a single bucket to a well-distributed hash table lookup. Performance test (Batch installation of 100,000 routes with MPLS labels): CPU: Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz - Before patch: 6m 0.258s (sys 5m 56.895s) - After patch: 0m 0.879s (sys 0m 0.468s) Signed-off-by: Vitaliy Guschin --- Changes in v2: - Removed unnecessary nla_total_size() call in lwtunnel_get_encap_hash logic. include/net/lwtunnel.h | 7 +++++++ net/core/lwtunnel.c | 22 ++++++++++++++++++++++ net/ipv4/fib_semantics.c | 12 +++++++++++- net/mpls/mpls_iptunnel.c | 13 +++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 26232f603e33..c91e4d4fa08b 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -47,6 +47,7 @@ struct lwtunnel_encap_ops { int (*fill_encap)(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); + unsigned int (*get_encap_hash)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); int (*xmit)(struct sk_buff *skb); @@ -127,6 +128,7 @@ int lwtunnel_build_state(struct net *net, u16 encap_type, int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); +unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate); struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -237,6 +239,11 @@ static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) return 0; } +static inline unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate) +{ + return 0; +} + static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) { return NULL; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index f9d76d85d04f..07b01a0c1895 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -289,6 +289,28 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) } EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); +unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate) +{ + const struct lwtunnel_encap_ops *ops; + unsigned int hash = 0; + + if (!lwtstate) + return 0; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->get_encap_hash)) + hash = ops->get_encap_hash(lwtstate); + rcu_read_unlock(); + + return hash; +} +EXPORT_SYMBOL_GPL(lwtunnel_get_encap_hash); + int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { const struct lwtunnel_encap_ops *ops; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0caf38e44c73..775582537561 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -325,6 +325,16 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, return val; } +static unsigned int fib_info_hashfn_nh(unsigned int val, const struct fib_nh *nh) +{ + val ^= nh->fib_nh_oif; + + if (nh->fib_nh_lws) + val ^= lwtunnel_get_encap_hash(nh->fib_nh_lws); + + return val; +} + static unsigned int fib_info_hashfn_result(const struct net *net, unsigned int val) { @@ -344,7 +354,7 @@ static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi) val ^= fi->nh->id; } else { for_nexthops(fi) { - val ^= nh->fib_nh_oif; + val ^= fib_info_hashfn_nh(val, nh); } endfor_nexthops(fi) } diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 1a1a0eb5b787..0960dfb3d633 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -259,6 +259,18 @@ static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) return nlsize; } +static unsigned int mpls_encap_hash(struct lwtunnel_state *lwtstate) +{ + struct mpls_iptunnel_encap *tun_encap_info; + unsigned int hash; + + tun_encap_info = mpls_lwtunnel_encap(lwtstate); + + hash = jhash2(tun_encap_info->label, tun_encap_info->labels, 0); + + return hash; +} + static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a); @@ -281,6 +293,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = { .xmit = mpls_xmit, .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, + .get_encap_hash = mpls_encap_hash, .cmp_encap = mpls_encap_cmp, .owner = THIS_MODULE, }; -- 2.53.0