xfrm_trans_queue() queues transport-mode packets for async reinject via xfrm_trans_reinject() workqueue. After async crypto completes, xfrm_input_resume() re-enters xfrm_input() with encap_type == -1, which immediately calls dev_put(skb->dev) before the skb reaches transport_finish and the reinject queue. The device can be freed before the workqueue callback runs, causing a use-after-free when xfrm_trans_reinject dereferences skb->dev. Remove the dev_put from the async resumption entry and let the reference survive through the transport reinject path. Introduce async variants of the NF_HOOK okfn callbacks that queue the skb with dev_held=true and drop the reference on error. The reinject worker checks this flag and puts the reference after the callback completes. For the synchronous crypto path, the existing dev_hold/dev_put around x->type->input() is unchanged — the reference is balanced within the same softirq context before the skb reaches the queue. If the loop re-enters async crypto (multi-SPI with a second -EINPROGRESS), drop the extra reference from the earlier async resume so exactly one reference accompanies the skb. Fixes: acf568ee859f ("xfrm: Reinject transport-mode packets through tasklet") Cc: stable@vger.kernel.org Signed-off-by: Qi Tang --- Changes in v2: - Do not add extra dev_hold/dev_put pair (reviewer feedback: "expensive operation, we just drop it too early") - Reuse existing dev_hold from xfrm_input, delay dev_put to reinject completion - Add async okfn variants for IPv4/IPv6 transport_finish so the reinject queue knows whether a dev ref is held - Drop the cb->dev field from v1; use bool dev_held flag instead Link: https://lore.kernel.org/all/20260320073023.21873-1-tpluszz77@gmail.com/ --- include/net/xfrm.h | 3 ++- net/ipv4/esp4.c | 3 ++- net/ipv4/xfrm4_input.c | 25 ++++++++++++++++++++++++- net/ipv6/esp6.c | 3 ++- net/ipv6/xfrm6_input.c | 16 +++++++++++++++- net/xfrm/xfrm_input.c | 35 ++++++++++++++++++++++++++--------- 6 files changed, 71 insertions(+), 14 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 10d3edde6b2f..1dd8b3b36649 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1779,7 +1779,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, int (*finish)(struct net *, struct sock *, - struct sk_buff *)); + struct sk_buff *), + bool dev_held); int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6dfc0bcdef65..0114c92b10d4 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -187,7 +187,8 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) int err; local_bh_disable(); - err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb, + false); local_bh_enable(); /* EINPROGRESS just happens to do the right thing. It diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f28cfd88eaf5..9765fdc63ffc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -46,6 +46,28 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, return NET_RX_DROP; } +static int xfrm4_rcv_encap_finish_async(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + if (!skb_dst(skb)) { + const struct iphdr *iph = ip_hdr(skb); + + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), skb->dev)) + goto drop; + } + + if (xfrm_trans_queue_net(dev_net(skb->dev), skb, + xfrm4_rcv_encap_finish2, true)) + goto drop; + + return 0; +drop: + dev_put(skb->dev); + kfree_skb(skb); + return NET_RX_DROP; +} + int xfrm4_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); @@ -74,7 +96,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, - xfrm4_rcv_encap_finish); + async ? xfrm4_rcv_encap_finish_async : + xfrm4_rcv_encap_finish); return 0; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9f75313734f8..8a0a44d7d010 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -204,7 +204,8 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) int err; local_bh_disable(); - err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb, + false); local_bh_enable(); /* EINPROGRESS just happens to do the right thing. It diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 9005fc156a20..d4eede5315ac 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -40,6 +40,19 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk, return 0; } +static int xfrm6_transport_finish2_async(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + if (xfrm_trans_queue_net(dev_net(skb->dev), skb, ip6_rcv_finish, + true)) { + dev_put(skb->dev); + kfree_skb(skb); + return NET_RX_DROP; + } + + return 0; +} + int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); @@ -69,7 +82,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, - xfrm6_transport_finish2); + async ? xfrm6_transport_finish2_async : + xfrm6_transport_finish2); return 0; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index dc1312ed5a09..2d75f984532a 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -40,6 +40,7 @@ struct xfrm_trans_cb { } header; int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); struct net *net; + bool dev_held; }; #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) @@ -506,7 +507,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { async = 1; - dev_put(skb->dev); seq = XFRM_SKB_CB(skb)->seq.input.low; spin_lock(&x->lock); goto resume; @@ -659,8 +659,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) dev_hold(skb->dev); nexthdr = x->type->input(x, skb); - if (nexthdr == -EINPROGRESS) + if (nexthdr == -EINPROGRESS) { + if (async) + dev_put(skb->dev); return 0; + } dev_put(skb->dev); spin_lock(&x->lock); @@ -695,9 +698,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; err = xfrm_inner_mode_input(x, skb); - if (err == -EINPROGRESS) + if (err == -EINPROGRESS) { + if (async) + dev_put(skb->dev); return 0; - else if (err) { + } else if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -734,6 +739,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) sp->olen = 0; if (skb_valid_dst(skb)) skb_dst_drop(skb); + if (async) + dev_put(skb->dev); gro_cells_receive(&gro_cells, skb); return 0; } else { @@ -753,6 +760,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) sp->olen = 0; if (skb_valid_dst(skb)) skb_dst_drop(skb); + if (async) + dev_put(skb->dev); gro_cells_receive(&gro_cells, skb); return err; } @@ -763,6 +772,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) drop_unlock: spin_unlock(&x->lock); drop: + if (async) + dev_put(skb->dev); xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); kfree_skb(skb); return 0; @@ -787,15 +798,20 @@ static void xfrm_trans_reinject(struct work_struct *work) spin_unlock_bh(&trans->queue_lock); local_bh_disable(); - while ((skb = __skb_dequeue(&queue))) - XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net, - NULL, skb); + while ((skb = __skb_dequeue(&queue))) { + struct xfrm_trans_cb *cb = XFRM_TRANS_SKB_CB(skb); + struct net_device *dev = cb->dev_held ? skb->dev : NULL; + + cb->finish(cb->net, NULL, skb); + dev_put(dev); + } local_bh_enable(); } int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, int (*finish)(struct net *, struct sock *, - struct sk_buff *)) + struct sk_buff *), + bool dev_held) { struct xfrm_trans_tasklet *trans; @@ -808,6 +824,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, XFRM_TRANS_SKB_CB(skb)->finish = finish; XFRM_TRANS_SKB_CB(skb)->net = net; + XFRM_TRANS_SKB_CB(skb)->dev_held = dev_held; spin_lock_bh(&trans->queue_lock); __skb_queue_tail(&trans->queue, skb); spin_unlock_bh(&trans->queue_lock); @@ -820,7 +837,7 @@ int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)) { - return xfrm_trans_queue_net(dev_net(skb->dev), skb, finish); + return xfrm_trans_queue_net(dev_net(skb->dev), skb, finish, false); } EXPORT_SYMBOL(xfrm_trans_queue); -- 2.43.0