Going forward skb_dst_set will assert that skb dst_entry is empty during skb_dst_set to prevent potential leaks. There are few places that still manually manage dst_entry not using the helpers. Convert them to the following new helpers: - skb_dstref_steal that resets dst_entry and returns previous dst_entry value - skb_dstref_restore that restores dst_entry previously reset via skb_dstref_steal Signed-off-by: Stanislav Fomichev --- include/linux/skbuff.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..7538ca507ee9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1159,6 +1159,38 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +/** + * skb_dstref_steal() - return current dst_entry value and clear it + * @skb: buffer + * + * Resets skb dst_entry without adjusting its reference count. Useful in + * cases where dst_entry needs to be temporarily reset and restored. + * Note that the returned value cannot be used directly because it + * might contain SKB_DST_NOREF bit. + * + * When in doubt, prefer skb_dst_drop() over skb_dstref_steal() to correctly + * handle dst_entry reference counting. + * + * Returns: original skb dst_entry. + */ +static inline unsigned long skb_dstref_steal(struct sk_buff *skb) +{ + unsigned long refdst = skb->_skb_refdst; + + skb->_skb_refdst = 0; + return refdst; +} + +/** + * skb_dstref_restore() - restore skb dst_entry removed via skb_dstref_steal() + * @skb: buffer + * @refdst: dst entry from a call to skb_dstref_steal() + */ +static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) +{ + skb->_skb_refdst = refdst; +} + /** * skb_dst_set - sets skb dst * @skb: buffer -- 2.50.1 Going forward skb_dst_set will assert that skb dst_entry is empty during skb_dst_set. skb_dstref_steal is added to reset existing entry without doing refcnt. Switch to skb_dstref_steal in __xfrm_route_forward and add a comment on why it's safe to skip skb_dstref_restore. Signed-off-by: Stanislav Fomichev --- net/xfrm/xfrm_policy.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5035a9bc3bb..7111184eef59 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3881,12 +3881,18 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); - if (!skb_dst(skb)) { + dst = skb_dst(skb); + if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } - dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); + /* ignore return value from skb_dstref_steal, xfrm_lookup takes + * care of dropping the refcnt if needed. + */ + skb_dstref_steal(skb); + + dst = xfrm_lookup(net, dst, &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; -- 2.50.1 Going forward skb_dst_set will assert that skb dst_entry is empty during skb_dst_set. skb_dstref_steal is added to reset existing entry without doing refcnt. Switch to skb_dstref_steal in ip[6]_route_me_harder and add a comment on why it's safe to skip skb_dstref_restore. Acked-by: Florian Westphal Signed-off-by: Stanislav Fomichev --- net/ipv4/netfilter.c | 5 ++++- net/ipv6/netfilter.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 0565f001120d..e60e54e7945d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -65,7 +65,10 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); - skb_dst_set(skb, NULL); + /* ignore return value from skb_dstref_steal, xfrm_lookup takes + * care of dropping the refcnt if needed. + */ + skb_dstref_steal(skb); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 45f9105f9ac1..46540a5a4331 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -63,7 +63,10 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { - skb_dst_set(skb, NULL); + /* ignore return value from skb_dstref_steal, xfrm_lookup takes + * care of dropping the refcnt if needed. + */ + skb_dstref_steal(skb); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); -- 2.50.1 Going forward skb_dst_set will assert that skb dst_entry is empty during skb_dst_set. skb_dstref_steal is added to reset existing entry without doing refcnt. skb_dstref_restore should be used to restore the previous entry. Convert icmp_route_lookup and ip_options_rcv_srr to these helpers. Add extra call to skb_dstref_reset to icmp_route_lookup to clear the ip_route_input entry. Signed-off-by: Stanislav Fomichev --- net/ipv4/icmp.c | 7 ++++--- net/ipv4/ip_options.c | 5 ++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2ffe73ea644f..91765057aa1d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -544,14 +544,15 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, goto relookup_failed; } /* Ugh! */ - orefdst = skb_in->_skb_refdst; /* save old refdst */ - skb_dst_set(skb_in, NULL); + orefdst = skb_dstref_steal(skb_in); err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, dscp, rt2->dst.dev) ? -EINVAL : 0; dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); - skb_in->_skb_refdst = orefdst; /* restore old refdst */ + /* steal dst entry from skb_in, don't drop refcnt */ + skb_dstref_steal(skb_in); + skb_dstref_restore(skb_in, orefdst); } if (err) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e3321932bec0..be8815ce3ac2 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -615,14 +615,13 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) } memcpy(&nexthop, &optptr[srrptr-1], 4); - orefdst = skb->_skb_refdst; - skb_dst_set(skb, NULL); + orefdst = skb_dstref_steal(skb); err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph), dev) ? -EINVAL : 0; rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); - skb->_skb_refdst = orefdst; + skb_dstref_restore(skb, orefdst); return -EINVAL; } refdst_drop(orefdst); -- 2.50.1 Instead of doing dst_release and skb_dst_set, do skb_dst_drop which should do the right thing. Acked-by: Greg Kroah-Hartman Signed-off-by: Stanislav Fomichev --- drivers/staging/octeon/ethernet-tx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index 261f8dbdc382..0ba240e634a1 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -346,8 +346,7 @@ netdev_tx_t cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) * The skbuff will be reused without ever being freed. We must * cleanup a bunch of core things. */ - dst_release(skb_dst(skb)); - skb_dst_set(skb, NULL); + skb_dst_drop(skb); skb_ext_reset(skb); nf_reset_ct(skb); skb_reset_redirect(skb); -- 2.50.1 Going forward skb_dst_set will assert that skb dst_entry is empty during skb_dst_set. skb_dstref_steal is added to reset existing entry without doing refcnt. Chelsio driver is doing extra dst management via skb_dst_set(NULL). Replace these calls with skb_dstref_steal. Signed-off-by: Stanislav Fomichev --- .../ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 10 +++++----- .../ethernet/chelsio/inline_crypto/chtls/chtls_cm.h | 4 ++-- .../ethernet/chelsio/inline_crypto/chtls/chtls_io.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 6f6525983130..2e7c2691a193 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -171,7 +171,7 @@ static void chtls_purge_receive_queue(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - skb_dst_set(skb, (void *)NULL); + skb_dstref_steal(skb); kfree_skb(skb); } } @@ -194,7 +194,7 @@ static void chtls_purge_recv_queue(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); kfree_skb(skb); } } @@ -1734,7 +1734,7 @@ static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_recv_data, sk, skb); return 0; } @@ -1786,7 +1786,7 @@ static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_recv_pdu, sk, skb); return 0; } @@ -1855,7 +1855,7 @@ static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_rx_hdr, sk, skb); return 0; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h index f61ca657601c..2285cf2df251 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h @@ -171,14 +171,14 @@ static inline void chtls_set_req_addr(struct request_sock *oreq, static inline void chtls_free_skb(struct sock *sk, struct sk_buff *skb) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 465fa8077964..4036db466e18 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1434,7 +1434,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, continue; found_ok_skb: if (!skb->len) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); -- 2.50.1 To prevent dst_entry leaks, add warning when the non-NULL dst_entry is rewritten. Signed-off-by: Stanislav Fomichev --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7538ca507ee9..ca8be45dd8be 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1159,6 +1159,12 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +static inline void skb_dst_check_unset(struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE((skb->_skb_refdst & SKB_DST_PTRMASK) && + !(skb->_skb_refdst & SKB_DST_NOREF)); +} + /** * skb_dstref_steal() - return current dst_entry value and clear it * @skb: buffer @@ -1188,6 +1194,7 @@ static inline unsigned long skb_dstref_steal(struct sk_buff *skb) */ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) { + skb_dst_check_unset(skb); skb->_skb_refdst = refdst; } @@ -1201,6 +1208,7 @@ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } @@ -1217,6 +1225,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; -- 2.50.1