This is needed in the context of Tetragon to provide improved feedback (in contrast to just dropping packets) to east-west traffic when blocked by policies using cgroup_skb programs. This reuse concepts from netfilter reject target codepath with the differences that: * Packets are cloned since the BPF user can still return SK_PASS from the cgroup_skb progs and the current skb need to stay untouched (cgroup_skb hooks only allow read-only skb payload). * Since cgroup_skb programs are called late in the stack, checksums do not need to be computed or verified, and IPv4 fragmentation does not need to be checked (ip_local_deliver should take care of that earlier). Signed-off-by: Mahe Tardy --- net/core/filter.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 7a72f766aacf..ff7b2b175425 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -85,6 +85,8 @@ #include #include #include +#include +#include #include "dev.h" @@ -12148,6 +12150,53 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, return 0; } +__bpf_kfunc int bpf_icmp_send_unreach(struct __sk_buff *__skb, int code) +{ + struct sk_buff *skb = (struct sk_buff *)__skb; + struct sk_buff *nskb; + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (code < 0 || code > NR_ICMP_UNREACH) + return -EINVAL; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (ip_route_reply_fetch_dst(nskb) < 0) { + kfree_skb(nskb); + return -EHOSTUNREACH; + } + + icmp_send(nskb, ICMP_DEST_UNREACH, code, 0); + kfree_skb(nskb); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (code < 0 || code > ICMPV6_REJECT_ROUTE) + return -EINVAL; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (ip6_route_reply_fetch_dst(nskb) < 0) { + kfree_skb(nskb); + return -EHOSTUNREACH; + } + + icmpv6_send(nskb, ICMPV6_DEST_UNREACH, code, 0); + kfree_skb(nskb); + break; +#endif + default: + return -EPROTONOSUPPORT; + } + + return SK_DROP; +} + __bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, @@ -12185,6 +12234,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops) BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops) +BTF_KFUNCS_START(bpf_kfunc_check_set_icmp_send_unreach) +BTF_ID_FLAGS(func, bpf_icmp_send_unreach, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(bpf_kfunc_check_set_icmp_send_unreach) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -12210,6 +12263,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { .set = &bpf_kfunc_check_set_sock_ops, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_icmp_send_unreach = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_icmp_send_unreach, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -12229,6 +12287,7 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_icmp_send_unreach); return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); } late_initcall(bpf_kfunc_init); -- 2.34.1