Changes in V1: 1) Fix compile issues found by Intel bot. Thank you bot Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202512121401.3NRi0dUf-lkp@intel.com/ 2) Fix other issues found by patchwork - https://patchwork.kernel.org/project/netdevbpf/patch/20251210172554.1071864-1-jhs@mojatatu.com/ 3) The AI reviewer claimed there was an issue but the link was a 404 - https://netdev-ai.bots.linux.dev/ai-review.html?id=23b3f0a5-ca6c-4cd2-962e-34cbf46f9d24 This patch could be broken down into multiple patches(at least two), but posted as one because it is an RFC. When mirred redirects from egress to ingress the loop state is lost. This is because the current loop detection mechanism depends on the device being rememebred on the sched_mirred_dev array; however, that array is cleared when we go from egress->ingress because the packet ends up in the backlog and when we restart from the backlog the loop is amplified, on and on... A simple test case: tc qdisc add dev ethx clsact tc qdisc add dev ethy clsact tc filter add dev ethx ingress protocol ip \ prio 10 matchall action mirred egress redirect dev ethy tc filter add dev ethy egress protocol ip \ prio 10 matchall action mirred ingress redirect dev ethx ping such that packets arrive on ethx. Puff and sweat while the cpu consumption goes up. Or just delete those two qdiscs from above on ethx and ethy. For this to work we need to _remember the loop state in the skb_. We reclaim the bit "skb->from_ingress" to the qdisc_skb_cb since its use is constrained for ifb. We then use an extra bit that was available on the skb for a total of 2 "skb->ttl" bits. Mirred increments the ttl whenever it sees the same skb. We then catch it when it exceeds MIRRED_NEST_LIMIT iterations of the loop. Signed-off-by: Jamal Hadi Salim --- drivers/net/ifb.c | 3 +-- include/linux/skbuff.h | 26 ++++---------------------- include/net/sch_generic.h | 20 ++++++++++++++++++++ net/netfilter/nft_fwd_netdev.c | 1 + net/sched/act_mirred.c | 5 ++++- 5 files changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index d3dc0914450a..4783d479d1d6 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -123,8 +123,7 @@ static void ifb_ri_tasklet(struct tasklet_struct *t) } rcu_read_unlock(); skb->skb_iif = txp->dev->ifindex; - - if (!skb->from_ingress) { + if (!qdisc_skb_cb(skb)->from_ingress) { dev_queue_xmit(skb); } else { skb_pull_rcsum(skb, skb->mac_len); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 86737076101d..a6df99714a44 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -840,6 +840,7 @@ enum skb_tstamp_type { * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @encapsulation: indicates the inner headers in the skbuff are valid * @encap_hdr_csum: software checksum is needed + * @ttl: time to live count when a packet loops. * @csum_valid: checksum is already valid * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL * @csum_complete_sw: checksum was completed by software @@ -1000,6 +1001,9 @@ struct sk_buff { /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; +#ifdef CONFIG_NET_REDIRECT + __u8 ttl:2; +#endif __u8 csum_valid:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; @@ -1016,9 +1020,6 @@ struct sk_buff { __u8 offload_l3_fwd_mark:1; #endif __u8 redirected:1; -#ifdef CONFIG_NET_REDIRECT - __u8 from_ingress:1; -#endif #ifdef CONFIG_NETFILTER_SKIP_EGRESS __u8 nf_skip_egress:1; #endif @@ -5352,30 +5353,11 @@ static inline bool skb_is_redirected(const struct sk_buff *skb) return skb->redirected; } -static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) -{ - skb->redirected = 1; -#ifdef CONFIG_NET_REDIRECT - skb->from_ingress = from_ingress; - if (skb->from_ingress) - skb_clear_tstamp(skb); -#endif -} - static inline void skb_reset_redirect(struct sk_buff *skb) { skb->redirected = 0; } -static inline void skb_set_redirected_noclear(struct sk_buff *skb, - bool from_ingress) -{ - skb->redirected = 1; -#ifdef CONFIG_NET_REDIRECT - skb->from_ingress = from_ingress; -#endif -} - static inline bool skb_csum_is_sctp(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IP_SCTP) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c3a7268b567e..ee494310165f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -459,6 +459,7 @@ struct qdisc_skb_cb { u8 post_ct:1; u8 post_ct_snat:1; u8 post_ct_dnat:1; + u8 from_ingress:1; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); @@ -1140,6 +1141,25 @@ static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb, q->to_free = skb; } +static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) +{ + skb->redirected = 1; +#ifdef CONFIG_NET_REDIRECT + qdisc_skb_cb(skb)->from_ingress = from_ingress; + if (qdisc_skb_cb(skb)->from_ingress) + skb_clear_tstamp(skb); +#endif +} + +static inline void skb_set_redirected_noclear(struct sk_buff *skb, + bool from_ingress) +{ + skb->redirected = 1; +#ifdef CONFIG_NET_REDIRECT + qdisc_skb_cb(skb)->from_ingress = from_ingress; +#endif +} + /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 152a9fb4d23a..d62c856ef96a 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -16,6 +16,7 @@ #include #include #include +#include struct nft_fwd_netdev { u8 sreg_dev; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 91c96cc625bd..fec5a5763fcb 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -318,8 +318,10 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); + skb_to_send->ttl++; err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } else { + skb_to_send->ttl++; err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } if (err) @@ -434,7 +436,8 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, #else xmit = this_cpu_ptr(&softnet_data.xmit); #endif - if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT)) { + + if (skb->ttl >= MIRRED_NEST_LIMIT - 1) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); return TC_ACT_SHOT; -- 2.34.1