With CONFIG_STACKPROTECTOR_STRONG=y, it is better to avoid passing a pointer to an automatic variable. Change these exported functions to return 'u8 proto' instead of void. - ipv6_push_nfrag_opts() - ipv6_push_frag_opts() For instance, replace ipv6_push_frag_opts(skb, opt, &proto); with: proto = ipv6_push_frag_opts(skb, opt, proto); Note that even after this change, ip6_xmit() has to use a stack canary because of @first_hop variable. Signed-off-by: Eric Dumazet --- include/net/ipv6.h | 10 ++++---- net/ipv6/exthdrs.c | 55 +++++++++++++++++++++++-------------------- net/ipv6/ip6_output.c | 19 ++++++++------- net/ipv6/ip6_tunnel.c | 2 +- 4 files changed, 46 insertions(+), 40 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cf2203ff2bfd2b42381aba196578cd2668796c8b..a35f0a8114c093c65d3706f53b945cb426bd212b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1151,11 +1151,11 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); * Extension header (options) processing */ -void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p, - struct in6_addr *saddr); -void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto); +u8 ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); +u8 ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 proto); int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, __be16 *frag_offp); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index cd318ff0dcd8784c3e3841d61d682f2c4a523e61..209fdf1b1aa9b9111f9131a75a6e18e7f9a153fe 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -1074,9 +1074,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb) * for headers. */ -static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, - struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p, struct in6_addr *saddr) +static u8 ipv6_push_rthdr0(struct sk_buff *skb, u8 proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) { struct rt0_hdr *phdr, *ihdr; int hops; @@ -1095,13 +1095,13 @@ static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, phdr->addr[hops - 1] = **addr_p; *addr_p = ihdr->addr; - phdr->rt_hdr.nexthdr = *proto; - *proto = NEXTHDR_ROUTING; + phdr->rt_hdr.nexthdr = proto; + return NEXTHDR_ROUTING; } -static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, - struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p, struct in6_addr *saddr) +static u8 ipv6_push_rthdr4(struct sk_buff *skb, u8 proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) { struct ipv6_sr_hdr *sr_phdr, *sr_ihdr; int plen, hops; @@ -1144,58 +1144,61 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, } #endif - sr_phdr->nexthdr = *proto; - *proto = NEXTHDR_ROUTING; + sr_phdr->nexthdr = proto; + return NEXTHDR_ROUTING; } -static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, - struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p, struct in6_addr *saddr) +static u8 ipv6_push_rthdr(struct sk_buff *skb, u8 proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) { switch (opt->type) { case IPV6_SRCRT_TYPE_0: case IPV6_SRCRT_STRICT: case IPV6_SRCRT_TYPE_2: - ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); + proto = ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); break; case IPV6_SRCRT_TYPE_4: - ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr); + proto = ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr); break; default: break; } + return proto; } -static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) +static u8 ipv6_push_exthdr(struct sk_buff *skb, u8 proto, u8 type, struct ipv6_opt_hdr *opt) { struct ipv6_opt_hdr *h = skb_push(skb, ipv6_optlen(opt)); memcpy(h, opt, ipv6_optlen(opt)); - h->nexthdr = *proto; - *proto = type; + h->nexthdr = proto; + return type; } -void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, - struct in6_addr **daddr, struct in6_addr *saddr) +u8 ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 proto, + struct in6_addr **daddr, struct in6_addr *saddr) { if (opt->srcrt) { - ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); + proto = ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); /* * IPV6_RTHDRDSTOPTS is ignored * unless IPV6_RTHDR is set (RFC3542). */ if (opt->dst0opt) - ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + proto = ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); } if (opt->hopopt) - ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); + proto = ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); + return proto; } -void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) +u8 ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 proto) { if (opt->dst1opt) - ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt); + proto = ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt); + return proto; } EXPORT_SYMBOL(ipv6_push_frag_opts); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f904739e99b907a5704c32452ff585479e369727..e4bed95ae5bba173c2418d402cba46e946335219 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -305,11 +305,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, seg_len += opt->opt_nflen + opt->opt_flen; if (opt->opt_flen) - ipv6_push_frag_opts(skb, opt, &proto); + proto = ipv6_push_frag_opts(skb, opt, proto); if (opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, - &fl6->saddr); + proto = ipv6_push_nfrag_opts(skb, opt, proto, + &first_hop, + &fl6->saddr); } if (unlikely(seg_len > IPV6_MAXPLEN)) { @@ -1940,11 +1941,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, __skb_pull(skb, skb_network_header_len(skb)); final_dst = &fl6->daddr; - if (opt && opt->opt_flen) - ipv6_push_frag_opts(skb, opt, &proto); - if (opt && opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); - + if (opt) { + if (opt->opt_flen) + proto = ipv6_push_frag_opts(skb, opt, proto); + if (opt->opt_nflen) + proto = ipv6_push_nfrag_opts(skb, opt, proto, + &final_dst, &fl6->saddr); + } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f68f6f110a3e7ea70b9aca4ad1fcfb4d6fe15c38..6fca0868b5a237417eb4c013f713952e8922696d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1265,7 +1265,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); - ipv6_push_frag_opts(skb, &opt.ops, &proto); + proto = ipv6_push_frag_opts(skb, &opt.ops, proto); } skb_push(skb, sizeof(struct ipv6hdr)); -- 2.53.0.rc1.225.gd81095ad13-goog 1) daddr is unlikely a multicast in ip6_finish_output2(). 2) ip6_finish_output_gso_slowpath_drop() should not be called often. 3) ip6_fragment() should not be called often. 4) opt is unlikely to be set. 5) ip6_xmit() and ip6_forward() mostly sends not too big packets. 6) Most __ip6_make_skb() calls are for UDP packets, not ICMPV6 ones. Signed-off-by: Eric Dumazet --- net/ipv6/ip6_output.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e4bed95ae5bba173c2418d402cba46e946335219..5c2cfcc3cf6e56192123c5830250865d37e72581 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -80,7 +80,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * hdr = ipv6_hdr(skb); daddr = &hdr->daddr; - if (ipv6_addr_is_multicast(daddr)) { + if (unlikely(ipv6_addr_is_multicast(daddr))) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || @@ -179,8 +179,8 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, static int ip6_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { - if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && - !skb_gso_validate_network_len(skb, mtu)) + if (unlikely(!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && + !skb_gso_validate_network_len(skb, mtu))) return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); return ip6_finish_output2(net, sk, skb); @@ -202,8 +202,8 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff if (skb_is_gso(skb)) return ip6_finish_output_gso(net, sk, skb, mtu); - if (skb->len > mtu || - (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) + if (unlikely(skb->len > mtu || + (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))) return ip6_fragment(net, sk, skb, ip6_finish_output2); return ip6_finish_output2(net, sk, skb); @@ -301,7 +301,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, } } - if (opt) { + if (unlikely(opt)) { seg_len += opt->opt_nflen + opt->opt_flen; if (opt->opt_flen) @@ -354,7 +354,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->mark = mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { + if (likely((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the @@ -654,7 +654,7 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (ip6_pkt_too_big(skb, mtu)) { + if (unlikely(ip6_pkt_too_big(skb, mtu))) { /* Again, force OUTPUT device used as source address */ skb->dev = dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1368,7 +1368,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, /* * setup for corking */ - if (opt) { + if (unlikely(opt)) { if (WARN_ON(v6_cork->opt)) return -EINVAL; @@ -1885,7 +1885,7 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) static void ip6_cork_release(struct inet_cork_full *cork, struct inet6_cork *v6_cork) { - if (v6_cork->opt) { + if (unlikely(v6_cork->opt)) { struct ipv6_txoptions *opt = v6_cork->opt; kfree(opt->dst0opt); @@ -1941,7 +1941,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, __skb_pull(skb, skb_network_header_len(skb)); final_dst = &fl6->daddr; - if (opt) { + if (unlikely(opt)) { if (opt->opt_flen) proto = ipv6_push_frag_opts(skb, opt, proto); if (opt->opt_nflen) @@ -1969,7 +1969,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); - if (proto == IPPROTO_ICMPV6) { + if (unlikely(proto == IPPROTO_ICMPV6)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; -- 2.53.0.rc1.225.gd81095ad13-goog ip6_xmit() makes sure there is enough headroom in the skb, it can uses __skb_push() instead of the out-of-line skb_push(). Signed-off-by: Eric Dumazet --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5c2cfcc3cf6e56192123c5830250865d37e72581..279d4daa85c442eb3db8add1d97c57c1ffbe820f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -314,7 +314,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, } if (unlikely(seg_len > IPV6_MAXPLEN)) { - hop_jumbo = skb_push(skb, hoplen); + hop_jumbo = __skb_push(skb, hoplen); hop_jumbo->nexthdr = proto; hop_jumbo->hdrlen = 0; @@ -327,7 +327,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; } - skb_push(skb, sizeof(struct ipv6hdr)); + __skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); -- 2.53.0.rc1.225.gd81095ad13-goog When a too big packet is dropped, use SKB_DROP_REASON_PKT_TOO_BIG. Signed-off-by: Eric Dumazet --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 279d4daa85c442eb3db8add1d97c57c1ffbe820f..4bb61265f1d8f870dbfd60bd22b8ab78828fb4cd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -383,7 +383,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); unlock: rcu_read_unlock(); return ret; -- 2.53.0.rc1.225.gd81095ad13-goog With CONFIG_MITIGATION_RETPOLINE=y dst_mtu() is a bit fat, because it is generic. Indeed, clang does not always inline it. Add dst4_mtu() and dst6_mtu() helpers for callers that expect either ipv4_mtu() or ip6_mtu() to be called. These helpers are always inlined. Signed-off-by: Eric Dumazet --- include/net/dst.h | 6 ++++++ include/net/ip6_route.h | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/include/net/dst.h b/include/net/dst.h index f8aa1239b4db639bd6b63f4ddb4ec4d7ee459ac0..307073eae7f83456aa80dfa8686f839b302ca004 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -219,6 +219,12 @@ static inline u32 dst_mtu(const struct dst_entry *dst) return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); } +/* Variant of dst_mtu() for IPv4 users. */ +static inline u32 dst4_mtu(const struct dst_entry *dst) +{ + return INDIRECT_CALL_1(dst->ops->mtu, ipv4_mtu, dst); +} + /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7c5512baa4b2b7503494b1ae02756df29ef93666..a55f9bf95fe393311967b9b94073acb3860214aa 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -266,6 +266,12 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); +/* Variant of dst_mtu() for IPv6 users */ +static inline u32 dst6_mtu(const struct dst_entry *dst) +{ + return INDIRECT_CALL_1(dst->ops->mtu, ip6_mtu, dst); +} + static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) { const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? -- 2.53.0.rc1.225.gd81095ad13-goog When we expect an IPv6 dst, use dst6_mtu() instead of dst_mtu() to save some code space. Due to current dst6_mtu() implementation, only convert users in IPv6 stack. Signed-off-by: Eric Dumazet --- net/ipv6/ip6_gre.c | 2 +- net/ipv6/ip6_output.c | 6 +++--- net/ipv6/ip6_tunnel.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 4 ++-- net/ipv6/route.c | 10 ++++++---- net/ipv6/tcp_ipv6.c | 9 +++++---- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d19d86ed43766bbc8ec052113be02ab231a5272c..dafcc0dcd77a5394857d6f339d83370826a25109 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1057,7 +1057,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst) { mtu = READ_ONCE(dst_dev(dst)->mtu); - if (dst_mtu(dst) > mtu) + if (dst6_mtu(dst) > mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu, false); } err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4bb61265f1d8f870dbfd60bd22b8ab78828fb4cd..f110701d1eca1e5f763d9138a43e11e92c15412f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -353,7 +353,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->priority = priority; skb->mark = mark; - mtu = dst_mtu(dst); + mtu = dst6_mtu(dst); if (likely((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); @@ -1403,10 +1403,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->dontfrag = ipc6->dontfrag; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? - READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); + READ_ONCE(rt->dst.dev->mtu) : dst6_mtu(&rt->dst); else mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? - READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); + READ_ONCE(rt->dst.dev->mtu) : dst6_mtu(xfrm_dst_path(&rt->dst)); frag_size = READ_ONCE(np->frag_size); if (frag_size && frag_size < mtu) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6fca0868b5a237417eb4c013f713952e8922696d..4c29aa94e86eb9439458e4b23426bfda8b3b8922 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -638,7 +638,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* change mtu on this route */ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb_dst(skb2))) + if (rel_info > dst6_mtu(skb_dst(skb2))) goto out; skb_dst_update_pmtu_no_confirm(skb2, rel_info); @@ -1187,7 +1187,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; + mtu = dst6_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a61e742794f9e67ff3743ddc31341d8647d5bcdc..d784a8644ff20e6008925b3d9b8e13c380318657 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1184,7 +1184,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) - val = dst_mtu(dst); + val = dst6_mtu(dst); rcu_read_unlock(); if (!val) return -ENOTCONN; @@ -1283,7 +1283,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) - mtuinfo.ip6m_mtu = dst_mtu(dst); + mtuinfo.ip6m_mtu = dst6_mtu(dst); rcu_read_unlock(); if (!mtuinfo.ip6m_mtu) return -ENOTCONN; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3cfa7ae1294b59d14e030fb7a4ceabd4e9b673a8..c0350d97307e1ef6a3b500db32a311891d3c6e72 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2049,6 +2049,8 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, struct rt6_info *rt, int mtu) { + u32 dmtu = dst6_mtu(&rt->dst); + /* If the new MTU is lower than the route PMTU, this new MTU will be the * lowest MTU in the path: always allow updating the route PMTU to * reflect PMTU decreases. @@ -2059,10 +2061,10 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, * handle this. */ - if (dst_mtu(&rt->dst) >= mtu) + if (dmtu >= mtu) return true; - if (dst_mtu(&rt->dst) == idev->cnf.mtu6) + if (dmtu == idev->cnf.mtu6) return true; return false; @@ -2932,7 +2934,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (mtu < IPV6_MIN_MTU) return; - if (mtu >= dst_mtu(dst)) + if (mtu >= dst6_mtu(dst)) return; if (!rt6_cache_allowed_for_pmtu(rt6)) { @@ -3248,7 +3250,7 @@ EXPORT_SYMBOL_GPL(ip6_sk_redirect); static unsigned int ip6_default_advmss(const struct dst_entry *dst) { - unsigned int mtu = dst_mtu(dst); + unsigned int mtu = dst6_mtu(dst); struct net *net; mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4ae664b05fa9171ed996bf8f3b6e7b2aaa63d5c9..f154d8720e64908083032df7d8645289e801a59d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -351,7 +351,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, static void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; - u32 mtu; + u32 mtu, dmtu; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; @@ -368,8 +368,9 @@ static void tcp_v6_mtu_reduced(struct sock *sk) if (!dst) return; - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { - tcp_sync_mss(sk, dst_mtu(dst)); + dmtu = dst6_mtu(dst); + if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { + tcp_sync_mss(sk, dmtu); tcp_simple_retransmit(sk); } } @@ -1467,7 +1468,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_ca_openreq_child(newsk, dst); - tcp_sync_mss(newsk, dst_mtu(dst)); + tcp_sync_mss(newsk, dst6_mtu(dst)); newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); -- 2.53.0.rc1.225.gd81095ad13-goog When we expect an IPv4 dst, use dst4_mtu() instead of dst_mtu() to save some code space. Signed-off-by: Eric Dumazet --- net/ipv4/icmp.c | 2 +- net/ipv4/ip_output.c | 4 ++-- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/nf_reject_ipv4.c | 2 +- net/ipv4/tcp_ipv4.c | 13 ++++++------- net/ipv6/sit.c | 2 +- 7 files changed, 13 insertions(+), 14 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 19c9c838967f761a8b3a1b07cea5c5ec932bf12d..1d362a17a1c446e8d988162b164c5096a7f5d9dc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -945,7 +945,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, /* RFC says return as much as we can without exceeding 576 bytes. */ - room = dst_mtu(&rt->dst); + room = dst4_mtu(&rt->dst); if (room > 576) room = 576; room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.optlen; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 75fcb58795bbbd97d9544c05b245675ad1d1383a..e4790cc7b5c2ec7d6893d4141ca250ec05a4a2f5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1300,7 +1300,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, return -EFAULT; cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + dst4_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); if (!inetdev_valid_mtu(cork->fragsize)) return -ENETUNREACH; @@ -1439,7 +1439,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, pmtudisc = READ_ONCE(inet->pmtudisc); if (pmtudisc == IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_PROBE || - (skb->len <= dst_mtu(&rt->dst) && + (skb->len <= dst4_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6d9c5c20b1c4f93bd54283b5aadbb9bc61c24685..c062d9519818024e01746eec20eb3c036226f77e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1634,7 +1634,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, val = 0; dst = sk_dst_get(sk); if (dst) { - val = dst_mtu(dst); + val = dst4_mtu(dst); dst_release(dst); } if (!val) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ca9eaee4c2ef5f5cdc03608291ad1a0dc187d657..131382c388e95707f27dd7d43be9b9ee2f62c85d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1895,7 +1895,7 @@ static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt, return -1; } - if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { + if (skb->len+encap > dst4_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not * allow to send ICMP, so that packets will disappear * to blackhole. diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index fae4aa4a5f0955849c6a4ba85bc925c33cd9e102..fecf6621f679f9d435803a7bd522b38bf3de7ef4 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -303,7 +303,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, goto free_nskb; /* "Never happens" */ - if (nskb->len > dst_mtu(skb_dst(nskb))) + if (nskb->len > dst4_mtu(skb_dst(nskb))) goto free_nskb; nf_ct_attach(nskb, oldskb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ffdf52fbf6463b41d7c712f3710b681ecdf6e2d7..349ae751f9d0ea4090627717719c5afc573260fc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -374,7 +374,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct dst_entry *dst; - u32 mtu; + u32 mtu, dmtu; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; @@ -386,15 +386,14 @@ void tcp_v4_mtu_reduced(struct sock *sk) /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ - if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) + dmtu = dst4_mtu(dst); + if (mtu < dmtu && ip_dont_fragment(sk, dst)) WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); - mtu = dst_mtu(dst); - if (inet->pmtudisc != IP_PMTUDISC_DONT && ip_sk_accept_pmtu(sk) && - inet_csk(sk)->icsk_pmtu_cookie > mtu) { - tcp_sync_mss(sk, mtu); + inet_csk(sk)->icsk_pmtu_cookie > dmtu) { + tcp_sync_mss(sk, dmtu); /* Resend the TCP packet because it's * clear that the old packet has been @@ -1760,7 +1759,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, tcp_ca_openreq_child(newsk, dst); - tcp_sync_mss(newsk, dst_mtu(dst)); + tcp_sync_mss(newsk, dst4_mtu(dst)); newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cf37ad9686e698bf781df84f348d89ea7160ec63..439c8a1c662520d1769c0a60a5e9ae2cc0196d15 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -962,7 +962,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (df) { - mtu = dst_mtu(&rt->dst) - t_hlen; + mtu = dst4_mtu(&rt->dst) - t_hlen; if (mtu < IPV4_MIN_MTU) { DEV_STATS_INC(dev, collisions); -- 2.53.0.rc1.225.gd81095ad13-goog All inet6_cork users also use one inet_cork_full. Reduce number of parameters and increase data locality. This saves ~275 bytes of code on x86_64. Signed-off-by: Eric Dumazet --- include/linux/ipv6.h | 8 ------ include/net/inet_sock.h | 10 ++++++++ include/net/ipv6.h | 6 ++--- net/ipv6/ip6_output.c | 55 ++++++++++++++++++++--------------------- net/ipv6/raw.c | 2 +- 5 files changed, 40 insertions(+), 41 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7294e4e89b797b1c45d1920686c2553e10badb81..20aae8357dd151e8c7d6972f41e77cebf1379177 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -205,13 +205,6 @@ struct ipv6_mc_socklist; struct ipv6_ac_socklist; struct ipv6_fl_socklist; -struct inet6_cork { - struct ipv6_txoptions *opt; - u8 hop_limit; - u8 tclass; - u8 dontfrag:1; -}; - /* struct ipv6_pinfo - ipv6 private area */ struct ipv6_pinfo { /* Used in tx path (inet6_csk_route_socket(), ip6_xmit()) */ @@ -267,7 +260,6 @@ struct ipv6_pinfo { struct sk_buff *pktoptions; struct sk_buff *rxpmtu; - struct inet6_cork cork; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 903b2263ec8031565990dc52b921352e6a642b1a..7cdcbed3e5cbfd1a13698942da05b9ceabf72950 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -159,6 +159,13 @@ static inline bool inet_sk_bound_dev_eq(const struct net *net, #endif } +struct inet6_cork { + struct ipv6_txoptions *opt; + u8 hop_limit; + u8 tclass; + u8 dontfrag:1; +}; + struct inet_cork { unsigned int flags; __be32 addr; @@ -179,6 +186,9 @@ struct inet_cork { struct inet_cork_full { struct inet_cork base; struct flowi fl; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_cork base6; +#endif }; struct ip_mc_socklist; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index a35f0a8114c093c65d3706f53b945cb426bd212b..c27b9d7aeb7cf4611a55abc133d6aaa0d1508a47 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1107,8 +1107,7 @@ void ip6_flush_pending_frames(struct sock *sk); int ip6_send_skb(struct sk_buff *skb); struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, - struct inet_cork_full *cork, - struct inet6_cork *v6_cork); + struct inet_cork_full *cork); struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -1119,8 +1118,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { - return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, - &inet6_sk(sk)->cork); + return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f110701d1eca1e5f763d9138a43e11e92c15412f..e622a9e086cc0a3bc775fc87a379115bcf438e77 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1353,12 +1353,13 @@ static void ip6_append_data_mtu(unsigned int *mtu, } static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, - struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, + struct ipcm6_cookie *ipc6, struct rt6_info *rt) { + struct ipv6_txoptions *nopt, *opt = ipc6->opt; + struct inet6_cork *v6_cork = &cork->base6; struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu, frag_size; - struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so * ip6_cork_release() can put it down even in case of an error. @@ -1431,17 +1432,17 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, static int __ip6_append_data(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork_full, - struct inet6_cork *v6_cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, unsigned int flags) { - struct sk_buff *skb, *skb_prev = NULL; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; + struct inet6_cork *v6_cork = &cork_full->base6; struct inet_cork *cork = &cork_full->base; struct flowi6 *fl6 = &cork_full->fl.u.ip6; - unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; + struct sk_buff *skb, *skb_prev = NULL; struct ubuf_info *uarg = NULL; int exthdrlen = 0; int dst_exthdrlen = 0; @@ -1844,7 +1845,6 @@ int ip6_append_data(struct sock *sk, struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); int exthdrlen; int err; @@ -1855,7 +1855,7 @@ int ip6_append_data(struct sock *sk, * setup for corking */ dst_hold(&rt->dst); - err = ip6_setup_cork(sk, &inet->cork, &np->cork, + err = ip6_setup_cork(sk, &inet->cork, ipc6, rt); if (err) return err; @@ -1869,7 +1869,7 @@ int ip6_append_data(struct sock *sk, } return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, - &np->cork, sk_page_frag(sk), getfrag, + sk_page_frag(sk), getfrag, from, length, transhdrlen, flags); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -1882,9 +1882,10 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) skb_dst_set(skb, dst); } -static void ip6_cork_release(struct inet_cork_full *cork, - struct inet6_cork *v6_cork) +static void ip6_cork_release(struct inet_cork_full *cork) { + struct inet6_cork *v6_cork = &cork->base6; + if (unlikely(v6_cork->opt)) { struct ipv6_txoptions *opt = v6_cork->opt; @@ -1904,15 +1905,14 @@ static void ip6_cork_release(struct inet_cork_full *cork, struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, - struct inet_cork_full *cork, - struct inet6_cork *v6_cork) + struct inet_cork_full *cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; struct net *net = sock_net(sk); struct ipv6hdr *hdr; - struct ipv6_txoptions *opt = v6_cork->opt; + struct ipv6_txoptions *opt; struct rt6_info *rt = dst_rt6_info(cork->base.dst); struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; @@ -1941,6 +1941,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, __skb_pull(skb, skb_network_header_len(skb)); final_dst = &fl6->daddr; + opt = cork->base6.opt; if (unlikely(opt)) { if (opt->opt_flen) proto = ipv6_push_frag_opts(skb, opt, proto); @@ -1952,10 +1953,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, v6_cork->tclass, + ip6_flow_hdr(hdr, cork->base6.tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_autoflowlabel(net, sk), fl6)); - hdr->hop_limit = v6_cork->hop_limit; + hdr->hop_limit = cork->base6.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; @@ -1982,7 +1983,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } - ip6_cork_release(cork, v6_cork); + ip6_cork_release(cork); out: return skb; } @@ -2021,8 +2022,7 @@ EXPORT_SYMBOL_GPL(ip6_push_pending_frames); static void __ip6_flush_pending_frames(struct sock *sk, struct sk_buff_head *queue, - struct inet_cork_full *cork, - struct inet6_cork *v6_cork) + struct inet_cork_full *cork) { struct sk_buff *skb; @@ -2033,13 +2033,13 @@ static void __ip6_flush_pending_frames(struct sock *sk, kfree_skb(skb); } - ip6_cork_release(cork, v6_cork); + ip6_cork_release(cork); } void ip6_flush_pending_frames(struct sock *sk) { __ip6_flush_pending_frames(sk, &sk->sk_write_queue, - &inet_sk(sk)->cork, &inet6_sk(sk)->cork); + &inet_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); @@ -2050,9 +2050,8 @@ struct sk_buff *ip6_make_skb(struct sock *sk, struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork) { - struct inet6_cork v6_cork; - struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); + struct sk_buff_head queue; int err; if (flags & MSG_PROBE) { @@ -2065,21 +2064,21 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork->base.flags = 0; cork->base.addr = 0; cork->base.opt = NULL; - v6_cork.opt = NULL; - err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); + cork->base6.opt = NULL; + err = ip6_setup_cork(sk, cork, ipc6, rt); if (err) { - ip6_cork_release(cork, &v6_cork); + ip6_cork_release(cork); return ERR_PTR(err); } - err = __ip6_append_data(sk, &queue, cork, &v6_cork, + err = __ip6_append_data(sk, &queue, cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags); if (err) { - __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); + __ip6_flush_pending_frames(sk, &queue, cork); return ERR_PTR(err); } - return __ip6_make_skb(sk, &queue, cork, &v6_cork); + return __ip6_make_skb(sk, &queue, cork); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b4cd05dba9b6d209bfb1024b7e1b0c703f906e5b..ee6beba03e9b498c8ea5b9e5834e26251f81e958 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -529,7 +529,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; - opt = inet6_sk(sk)->cork.opt; + opt = inet_sk(sk)->cork.base6.opt; total_len -= opt ? opt->opt_flen : 0; if (offset >= total_len - 1) { -- 2.53.0.rc1.225.gd81095ad13-goog