Always prefer the avx2 implementation if its available. This greatly improves insertion performance (each insertion checks if the new element would overlap with an existing one): time nft -f - < Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 45 +++++++++++++++++++++++++---- net/netfilter/nft_set_pipapo_avx2.c | 8 ++--- net/netfilter/nft_set_pipapo_avx2.h | 4 +++ 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 9a10251228fd..7ed9c5f0e233 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -397,7 +397,7 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, } /** - * pipapo_get() - Get matching element reference given key data + * pipapo_get_slow() - Get matching element reference given key data * @m: storage containing the set elements * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask @@ -414,9 +414,9 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, * * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. */ -static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, - const u8 *data, u8 genmask, - u64 tstamp) +static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) { struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; @@ -502,6 +502,41 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, return NULL; } +/** + * pipapo_get() - Get matching element reference given key data + * @m: Storage containing the set elements + * @data: Key data to be matched against existing elements + * @genmask: If set, check that element is active in given genmask + * @tstamp: Timestamp to check for expired elements + * + * This is a dispatcher function, either calling out the generic C + * implementation or, if available, the AVX2 one. + * This helper is only called from the control plane, with either RCU + * read lock or transaction mutex held. + * + * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. + */ +static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) +{ + struct nft_pipapo_elem *e; + + local_bh_disable(); + +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) + if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && + irq_fpu_usable()) { + e = pipapo_get_avx2(m, data, genmask, tstamp); + local_bh_enable(); + return e; + } +#endif + e = pipapo_get_slow(m, data, genmask, tstamp); + local_bh_enable(); + return e; +} + /** * nft_pipapo_lookup() - Dataplane fronted for main lookup function * @net: Network namespace @@ -523,7 +558,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const struct nft_pipapo_elem *e; m = rcu_dereference(priv->match); - e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64()); + e = pipapo_get_slow(m, (const u8 *)key, genmask, get_jiffies_64()); return e ? &e->ext : NULL; } diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 994a2ad2d9b1..028c11724b42 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1149,9 +1149,9 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns * * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. */ -static struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, - const u8 *data, u8 genmask, - u64 tstamp) +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) { struct nft_pipapo_scratch *scratch; const struct nft_pipapo_field *f; @@ -1261,7 +1261,7 @@ static struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, * * This function is called from the data path. It will search for * an element matching the given key in the current active copy using - * the AVX2 routines if the fpu is usable or fall back to the generic + * the AVX2 routines if the FPU is usable or fall back to the generic * implementation of the algorithm otherwise. * * Return: nftables API extension pointer or NULL if no match. diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h index dbb6aaca8a7a..c2999b63da3f 100644 --- a/net/netfilter/nft_set_pipapo_avx2.h +++ b/net/netfilter/nft_set_pipapo_avx2.h @@ -5,8 +5,12 @@ #include #define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE) +struct nft_pipapo_match; bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp); #endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */ #endif /* _NFT_SET_PIPAPO_AVX2_H */ -- 2.49.1