This is needed in the context of Tetragon to provide improved feedback (in contrast to just dropping packets) to east-west traffic when blocked by policies using cgroup_skb programs. This reuses concepts from netfilter reject target codepath with the differences that: * Packets are cloned since the BPF user can still let the packet pass (SK_PASS from the cgroup_skb progs for example) and the current skb need to stay untouched (cgroup_skb hooks only allow read-only skb payload). * We protect against recursion since the kfunc, by generating an ICMP error message, could retrigger the BPF prog that invoked it. Only ICMP_DEST_UNREACH and ICMPV6_DEST_UNREACH are currently supported. The interface accepts a type parameter to facilitate future extension to other ICMP control message types. Signed-off-by: Mahe Tardy --- net/core/filter.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 2e96b4b847ce..f3aa494ed105 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -84,6 +84,9 @@ #include #include #include +#include +#include +#include #include "dev.h" @@ -12546,6 +12549,84 @@ __bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len) return 0; } +/** + * bpf_icmp_send - Send an ICMP control message + * @skb_ctx: Packet that triggered the control message + * @type: ICMP type (only ICMP_DEST_UNREACH/ICMPV6_DEST_UNREACH supported) + * @code: ICMP code (0-15 except ICMP_FRAG_NEEDED for IPv4, 0-6 for IPv6) + * + * Sends an ICMP control message in response to the packet. The original packet + * is cloned before sending the ICMP message, so the BPF program can still let + * the packet pass if desired. + * + * Currently only ICMP_DEST_UNREACH (IPv4) and ICMPV6_DEST_UNREACH (IPv6) are + * supported. + * + * Return: 0 on success (send attempt), negative error code on failure: + * -EBUSY: Recursion detected + * -EPROTONOSUPPORT: Non-IP protocol + * -EOPNOTSUPP: Unsupported ICMP type + * -EINVAL: Invalid code parameter + * -ENETUNREACH: Unusable IPv4 route/dst attached to the skb + * -ENOMEM: Memory allocation failed + */ +__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct sk_buff *nskb; + struct sock *sk; + + sk = skb_to_full_sk(skb); + if (sk && sk->sk_kern_sock && + (sk->sk_protocol == IPPROTO_ICMP || sk->sk_protocol == IPPROTO_ICMPV6)) + return -EBUSY; + + switch (skb->protocol) { +#if IS_ENABLED(CONFIG_INET) + case htons(ETH_P_IP): { + if (type != ICMP_DEST_UNREACH) + return -EOPNOTSUPP; + if (code < 0 || code > NR_ICMP_UNREACH || + code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */ + return -EINVAL; + + /* icmp_send requires a rtable; test-run synthetic skbs lack one. */ + if (!skb_valid_dst(skb)) + return -ENETUNREACH; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + memset(IPCB(nskb), 0, sizeof(*IPCB(nskb))); + icmp_send(nskb, type, code, 0); + consume_skb(nskb); + break; + } +#endif +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (type != ICMPV6_DEST_UNREACH) + return -EOPNOTSUPP; + if (code < 0 || code > ICMPV6_REJECT_ROUTE) + return -EINVAL; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + memset(IP6CB(nskb), 0, sizeof(*IP6CB(nskb))); + icmpv6_send(nskb, type, code, 0); + consume_skb(nskb); + break; +#endif + default: + return -EPROTONOSUPPORT; + } + + return 0; +} + __bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, @@ -12588,6 +12669,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops) BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp) BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops) +BTF_KFUNCS_START(bpf_kfunc_check_set_icmp_send) +BTF_ID_FLAGS(func, bpf_icmp_send) +BTF_KFUNCS_END(bpf_kfunc_check_set_icmp_send) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -12618,6 +12703,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { .set = &bpf_kfunc_check_set_sock_ops, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_icmp_send = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_icmp_send, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -12639,6 +12729,7 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_icmp_send); return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); } late_initcall(bpf_kfunc_init); -- 2.34.1