Add a dynptr type, similar to skb dynptr, but for the skb metadata. The dynptr provides an alternative to __sk_buff->data_meta for accessing the custom metadata area allocated using the bpf_xdp_adjust_meta() helper. More importantly, it abstracts away the fact where the storage for the custom metadata lives, which opens up the way to persist the metadata by relocating it as the skb travels through the network stack layers. A notable difference between the skb and the skb_meta dynptr is that writes to the skb_meta dynptr don't invalidate either skb or skb_meta dynptr slices, since they cannot lead to a skb->head reallocation. skb_meta dynptr ops are stubbed out and implemented by subsequent changes. Signed-off-by: Jakub Sitnicki --- include/linux/bpf.h | 14 +++++++++++++- kernel/bpf/helpers.c | 7 +++++++ kernel/bpf/log.c | 2 ++ kernel/bpf/verifier.c | 23 +++++++++++++++++++---- net/core/filter.c | 32 ++++++++++++++++++++++++++++++++ net/sched/bpf_qdisc.c | 1 + 6 files changed, 74 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bc887831eaa5..993860e8d66a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -749,12 +749,15 @@ enum bpf_type_flag { */ MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ + DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1348,6 +1351,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_SKB, /* Underlying data is a xdp_buff */ BPF_DYNPTR_TYPE_XDP, + /* Points to skb_metadata_end()-skb_metadata_len() */ + BPF_DYNPTR_TYPE_SKB_META, }; int bpf_dynptr_check_size(u32 size); @@ -3480,6 +3485,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, u32 *target_size); int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, struct bpf_dynptr *ptr); +int bpf_dynptr_from_skb_meta_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -3506,6 +3513,11 @@ static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, { return -EOPNOTSUPP; } +static inline int bpf_dynptr_from_skb_meta_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr) +{ + return -EOPNOTSUPP; +} #endif #ifdef CONFIG_INET diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3d33181d5e67..e25a6d44efd6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1775,6 +1775,8 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_XDP: return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); + case BPF_DYNPTR_TYPE_SKB_META: + return -EOPNOTSUPP; /* not implemented */ default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; @@ -1831,6 +1833,8 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, if (flags) return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); + case BPF_DYNPTR_TYPE_SKB_META: + return -EOPNOTSUPP; /* not implemented */ default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; @@ -1877,6 +1881,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3 return (unsigned long)(ptr->data + ptr->offset + offset); case BPF_DYNPTR_TYPE_SKB: case BPF_DYNPTR_TYPE_XDP: + case BPF_DYNPTR_TYPE_SKB_META: /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ return 0; default: @@ -2705,6 +2710,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false); return buffer__opt; } + case BPF_DYNPTR_TYPE_SKB_META: + return NULL; /* not implemented */ default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 38050f4ee400..e4983c1303e7 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -498,6 +498,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type) return "skb"; case BPF_DYNPTR_TYPE_XDP: return "xdp"; + case BPF_DYNPTR_TYPE_SKB_META: + return "skb_meta"; case BPF_DYNPTR_TYPE_INVALID: return ""; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e2fcea860755..d0a12397d42b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -674,6 +674,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_SKB; case DYNPTR_TYPE_XDP: return BPF_DYNPTR_TYPE_XDP; + case DYNPTR_TYPE_SKB_META: + return BPF_DYNPTR_TYPE_SKB_META; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -690,6 +692,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) return DYNPTR_TYPE_SKB; case BPF_DYNPTR_TYPE_XDP: return DYNPTR_TYPE_XDP; + case BPF_DYNPTR_TYPE_SKB_META: + return DYNPTR_TYPE_SKB_META; default: return 0; } @@ -2274,7 +2278,8 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg) { return base_type(reg->type) == PTR_TO_MEM && - (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP); + (reg->type & + (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)); } /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ @@ -12189,6 +12194,7 @@ enum special_kfunc_type { KF_bpf_rbtree_right, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, + KF_bpf_dynptr_from_skb_meta, KF_bpf_dynptr_slice, KF_bpf_dynptr_slice_rdwr, KF_bpf_dynptr_clone, @@ -12238,9 +12244,11 @@ BTF_ID(func, bpf_rbtree_right) #ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +BTF_ID(func, bpf_dynptr_from_skb_meta) #else BTF_ID_UNUSED BTF_ID_UNUSED +BTF_ID_UNUSED #endif BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) @@ -13214,6 +13222,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_SKB; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) { dynptr_arg_type |= DYNPTR_TYPE_XDP; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { + dynptr_arg_type |= DYNPTR_TYPE_SKB_META; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; @@ -21788,12 +21798,17 @@ static void specialize_kfunc(struct bpf_verifier_env *env, if (offset) return; - if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { + if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb] || + func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { seen_direct_write = env->seen_direct_write; is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); - if (is_rdonly) - *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; + if (is_rdonly) { + if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) + *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; + else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) + *addr = (unsigned long)bpf_dynptr_from_skb_meta_rdonly; + } /* restore env->seen_direct_write to its original value, since * may_access_direct_pkt_data mutates it diff --git a/net/core/filter.c b/net/core/filter.c index 7a72f766aacf..e4a1f50904db 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11978,6 +11978,25 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return func; } +static int dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, + struct bpf_dynptr *ptr_, bool rdonly) +{ + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr_; + struct sk_buff *skb = (struct sk_buff *)skb_; + + if (flags) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb)); + + if (rdonly) + bpf_dynptr_set_rdonly(ptr); + + return 0; +} + __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, struct bpf_dynptr *ptr__uninit) @@ -11995,6 +12014,12 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, return 0; } +__bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr__uninit) +{ + return dynptr_from_skb_meta(skb, flags, ptr__uninit, false); +} + __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags, struct bpf_dynptr *ptr__uninit) { @@ -12165,8 +12190,15 @@ int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, return 0; } +int bpf_dynptr_from_skb_meta_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr__uninit) +{ + return dynptr_from_skb_meta(skb, flags, ptr__uninit, true); +} + BTF_KFUNCS_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_kfunc_check_set_skb) BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c index 7ea8b54b2ab1..946c62d552cc 100644 --- a/net/sched/bpf_qdisc.c +++ b/net/sched/bpf_qdisc.c @@ -277,6 +277,7 @@ BTF_ID_FLAGS(func, bpf_skb_get_hash, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfree_skb, KF_RELEASE) BTF_ID_FLAGS(func, bpf_qdisc_skb_drop, KF_RELEASE) BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_qdisc_watchdog_schedule, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_qdisc_init_prologue, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_qdisc_reset_destroy_epilogue, KF_TRUSTED_ARGS) -- 2.43.0 Make it possible to read from skb metadata area using the bpf_dynptr_read() BPF helper. This prepares ground for access to skb metadata from all BPF hooks which operate on __sk_buff context. Signed-off-by: Jakub Sitnicki --- include/linux/filter.h | 8 ++++++++ kernel/bpf/helpers.c | 2 +- net/core/filter.c | 12 ++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index eca229752cbe..de0d1ce34f0d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1772,6 +1772,8 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, unsigned long len, bool flush); +int bpf_skb_meta_load_bytes(const struct sk_buff *src, u32 offset, + void *dst, u32 len); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1806,6 +1808,12 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi unsigned long len, bool flush) { } + +static inline int bpf_skb_meta_load_bytes(const struct sk_buff *src, u32 offset, + void *dst, u32 len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e25a6d44efd6..54b0e8dd747e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1776,7 +1776,7 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s case BPF_DYNPTR_TYPE_XDP: return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); case BPF_DYNPTR_TYPE_SKB_META: - return -EOPNOTSUPP; /* not implemented */ + return bpf_skb_meta_load_bytes(src->data, src->offset + offset, dst, len); default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; diff --git a/net/core/filter.c b/net/core/filter.c index e4a1f50904db..93524839a49f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11978,6 +11978,18 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return func; } +int bpf_skb_meta_load_bytes(const struct sk_buff *skb, u32 offset, + void *dst, u32 len) +{ + u32 meta_len = skb_metadata_len(skb); + + if (len > meta_len || offset > meta_len - len) + return -E2BIG; /* out of bounds */ + + memmove(dst, skb_metadata_end(skb) - meta_len + offset, len); + return 0; +} + static int dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, struct bpf_dynptr *ptr_, bool rdonly) { -- 2.43.0 Make it possible to write to skb metadata area using the bpf_dynptr_write() BPF helper. This prepares ground for access to skb metadata from all BPF hooks which operate on __sk_buff context. Signed-off-by: Jakub Sitnicki --- include/linux/filter.h | 8 ++++++++ kernel/bpf/helpers.c | 2 +- net/core/filter.c | 12 ++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index de0d1ce34f0d..7709e30ce2bb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1774,6 +1774,8 @@ void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, unsigned long len, bool flush); int bpf_skb_meta_load_bytes(const struct sk_buff *src, u32 offset, void *dst, u32 len); +int bpf_skb_meta_store_bytes(struct sk_buff *dst, u32 offset, + const void *src, u32 len); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1814,6 +1816,12 @@ static inline int bpf_skb_meta_load_bytes(const struct sk_buff *src, u32 offset, { return -EOPNOTSUPP; } + +static inline int bpf_skb_meta_store_bytes(struct sk_buff *dst, u32 offset, + const void *src, u32 len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 54b0e8dd747e..a264a6a3b4e4 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1834,7 +1834,7 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); case BPF_DYNPTR_TYPE_SKB_META: - return -EOPNOTSUPP; /* not implemented */ + return bpf_skb_meta_store_bytes(dst->data, dst->offset + offset, src, len); default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; diff --git a/net/core/filter.c b/net/core/filter.c index 93524839a49f..86b1572e8424 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11990,6 +11990,18 @@ int bpf_skb_meta_load_bytes(const struct sk_buff *skb, u32 offset, return 0; } +int bpf_skb_meta_store_bytes(struct sk_buff *dst, u32 offset, + const void *src, u32 len) +{ + u32 meta_len = skb_metadata_len(dst); + + if (len > meta_len || offset > meta_len - len) + return -E2BIG; /* out of bounds */ + + memmove(skb_metadata_end(dst) - meta_len + offset, src, len); + return 0; +} + static int dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, struct bpf_dynptr *ptr_, bool rdonly) { -- 2.43.0 Make it possible to read from or write to skb metadata area using the dynptr slices creates with bpf_dynptr_slice() or bpf_dynptr_slice_rdwr(). This prepares ground for access to skb metadata from all BPF hooks which operate on __sk_buff context. Signed-off-by: Jakub Sitnicki --- include/linux/filter.h | 6 ++++++ kernel/bpf/helpers.c | 2 +- net/core/filter.c | 10 ++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 7709e30ce2bb..a28c3a1593c9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1776,6 +1776,7 @@ int bpf_skb_meta_load_bytes(const struct sk_buff *src, u32 offset, void *dst, u32 len); int bpf_skb_meta_store_bytes(struct sk_buff *dst, u32 offset, const void *src, u32 len); +void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset, u32 len); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1822,6 +1823,11 @@ static inline int bpf_skb_meta_store_bytes(struct sk_buff *dst, u32 offset, { return -EOPNOTSUPP; } + +static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset, u32 len) +{ + return NULL; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a264a6a3b4e4..f599a254b284 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2711,7 +2711,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, return buffer__opt; } case BPF_DYNPTR_TYPE_SKB_META: - return NULL; /* not implemented */ + return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset, len); default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; diff --git a/net/core/filter.c b/net/core/filter.c index 86b1572e8424..d9cd73b70b2a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12002,6 +12002,16 @@ int bpf_skb_meta_store_bytes(struct sk_buff *dst, u32 offset, return 0; } +void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset, u32 len) +{ + u32 meta_len = skb_metadata_len(skb); + + if (len > meta_len || offset > meta_len - len) + return NULL; /* out of bounds */ + + return skb_metadata_end(skb) - meta_len + offset; +} + static int dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, struct bpf_dynptr *ptr_, bool rdonly) { -- 2.43.0 With the introduction of bpf_dynptr_from_skb_meta, all BPF programs authorized to call skb kfuncs (bpf_kfunc_set_skb) now have access to the skb metadata area. These programs can read up to skb_shinfo(skb)->meta_len bytes located just before skb_mac_header(skb), regardless of what data is currently there. However, as the network stack processes the skb, headers may be added or removed. Hence, we cannot assume that skb_mac_header() always marks the end of the metadata area. To avoid potential pitfalls, reset the skb metadata length to zero before passing the skb to the protocol layers. This is a temporary measure until we can make metadata persist through protocol processing. The change is backward compatible as today only TC BPF programs can access skb metadata through the __sk_buff->data_meta pointer. Signed-off-by: Jakub Sitnicki --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index 7969fddc94e3..6546ee7c3799 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5839,6 +5839,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, } #endif skb_reset_redirect(skb); + skb_metadata_clear(skb); skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; -- 2.43.0 dynptr for skb metadata behaves the same way as the dynptr for skb data with one exception - writes to skb_meta dynptr don't invalidate existing skb and skb_meta slices. Repeat the tests which are specific to skb dynptr for the skb_meta dynptr and add a couple of new tests (skb_data_valid_*) to ensure we don't invalidate the slices in the mentioned case. Signed-off-by: Jakub Sitnicki --- tools/testing/selftests/bpf/prog_tests/dynptr.c | 3 + tools/testing/selftests/bpf/progs/dynptr_fail.c | 297 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/dynptr_success.c | 62 +++++ 3 files changed, 362 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index f2b65398afce..375962f62f5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -31,7 +31,9 @@ static struct { {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG}, {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, + {"test_skb_meta_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_data", SETUP_SKB_PROG}, {"test_adjust", SETUP_SYSCALL_SLEEP}, {"test_adjust_err", SETUP_SYSCALL_SLEEP}, {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, @@ -41,6 +43,7 @@ static struct { {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP}, + {"test_dynptr_skb_meta_tp_btf", SETUP_SKB_PROG_TP}, {"test_probe_read_user_dynptr", SETUP_XDP_PROG}, {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG}, {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG}, diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index bd8f15229f5c..03c15315360b 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb) return SK_PASS; } +/* A metadata slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *(md + 1) = 42; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb) return SK_PASS; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem") +int skb_meta_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1115,6 +1155,29 @@ int skb_invalid_data_slice1(struct __sk_buff *skb) return SK_PASS; } +/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_meta_invalid_data_slice1(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = *md; + + return SK_PASS; +} + /* The read-write data slice is invalidated whenever a helper changes packet data */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1141,6 +1204,29 @@ int skb_invalid_data_slice2(struct __sk_buff *skb) return SK_PASS; } +/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_meta_invalid_data_slice2(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1167,6 +1253,74 @@ int skb_invalid_data_slice3(struct __sk_buff *skb) return SK_PASS; } +/* Read-only skb metadata slice is invalidated on write to skb data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_meta_invalid_data_slice3(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-only skb data slice is _not_ invalidated on write to skb metadata */ +SEC("?tc") +__success +int skb_valid_data_slice3(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should succeed */ + val = *d; + + return SK_PASS; +} + +/* Read-only skb metadata slice is _not_ invalidated on write to skb metadata */ +SEC("?tc") +__success +int skb_meta_valid_data_slice3(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should succeed */ + val = *md; + + return SK_PASS; +} + /* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1192,6 +1346,74 @@ int skb_invalid_data_slice4(struct __sk_buff *skb) return SK_PASS; } +/* Read-write skb metadata slice is invalidated on write to skb data slice */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_meta_invalid_data_slice4(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-write skb data slice is _not_ invalidated on write to skb metadata */ +SEC("?tc") +__success +int skb_valid_data_slice4(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should succeed */ + *d = 42; + + return SK_PASS; +} + +/* Read-write skb metadata slice is _not_ invalidated on write to skb metadata */ +SEC("?tc") +__success +int skb_meta_valid_data_slice4(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should succeed */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") @@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx) return 0; } +/* Only supported prog type can create skb_meta-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed") +int skb_meta_invalid_ctx(void *ctx) +{ + struct bpf_dynptr meta; + + /* this should fail */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + + return 0; +} + SEC("fentry/skb_tx_error") __failure __msg("must be referenced or trusted") int BPF_PROG(skb_invalid_ctx_fentry, void *skb) @@ -1267,6 +1502,18 @@ int BPF_PROG(skb_invalid_ctx_fentry, void *skb) return 0; } +SEC("fentry/skb_tx_error") +__failure __msg("must be referenced or trusted") +int BPF_PROG(skb_meta_invalid_ctx_fentry, void *skb) +{ + struct bpf_dynptr meta; + + /* this should fail */ + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + return 0; +} + SEC("fexit/skb_tx_error") __failure __msg("must be referenced or trusted") int BPF_PROG(skb_invalid_ctx_fexit, void *skb) @@ -1279,6 +1526,18 @@ int BPF_PROG(skb_invalid_ctx_fexit, void *skb) return 0; } +SEC("fexit/skb_tx_error") +__failure __msg("must be referenced or trusted") +int BPF_PROG(skb_meta_invalid_ctx_fexit, void *skb) +{ + struct bpf_dynptr meta; + + /* this should fail */ + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + return 0; +} + /* Reject writes to dynptr slot for uninit arg */ SEC("?raw_tp") __failure __msg("potential write to dynptr at off=-16") @@ -1405,6 +1664,22 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) return 0; } +SEC("cgroup_skb/ingress") +__failure __msg("the prog does not allow writes to packet data") +int invalid_slice_rdwr_rdonly_skb_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *p; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + /* this should fail */ + p = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*p)); + __sink(p); + + return 0; +} + /* bpf_dynptr_adjust can only be called on initialized dynptrs */ SEC("?raw_tp") __failure __msg("Expected an initialized dynptr as arg #0") @@ -1665,6 +1940,28 @@ int clone_skb_packet_data(struct __sk_buff *skb) return 0; } +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int clone_skb_packet_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr clone, meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + bpf_dynptr_clone(&meta, &clone); + md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return 0; +} + /* A xdp clone's data slices should be invalid anytime packet data changes */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 7d7081d05d47..680c58484cba 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -188,6 +188,26 @@ int test_skb_readonly(struct __sk_buff *skb) return 1; } +SEC("?cgroup_skb/egress") +int test_skb_meta_readonly(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + err = 2; + ret = bpf_dynptr_write(&meta, 0, "x", 1, 0); + if (ret != -EINVAL) + return 1; + + err = 0; + return 1; +} + SEC("?cgroup_skb/egress") int test_dynptr_skb_data(struct __sk_buff *skb) { @@ -209,6 +229,27 @@ int test_dynptr_skb_data(struct __sk_buff *skb) return 1; } +SEC("?cgroup_skb/egress") +int test_dynptr_skb_meta_data(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + err = 2; + md = bpf_dynptr_data(&meta, 0, sizeof(*md)); + if (md) + return 1; + + err = 0; + return 1; +} + SEC("tp/syscalls/sys_enter_nanosleep") int test_adjust(void *ctx) { @@ -567,6 +608,27 @@ int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location) return 1; } +SEC("tp_btf/kfree_skb") +int BPF_PROG(test_dynptr_skb_meta_tp_btf, void *skb, void *location) +{ + struct bpf_dynptr meta; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + /* Expect write failure. tp_btf skb is readonly. */ + err = 2; + ret = bpf_dynptr_write(&meta, 0, "x", 1, 0); + if (ret != -EINVAL) + return 1; + + err = 0; + return 1; +} + static inline int bpf_memcmp(const char *a, const char *b, u32 size) { int i; -- 2.43.0 Prepare for parametrizing the xdp_context tests. The assert_test_result helper doesn't need the whole skeleton. Pass just what it needs. Signed-off-by: Jakub Sitnicki --- tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index b9d9f0a502ce..0134651d94ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -156,15 +156,14 @@ static int send_test_packet(int ifindex) return -1; } -static void assert_test_result(struct test_xdp_meta *skel) +static void assert_test_result(const struct bpf_map *result_map) { int err; __u32 map_key = 0; __u8 map_value[TEST_PAYLOAD_LEN]; - err = bpf_map__lookup_elem(skel->maps.test_result, &map_key, - sizeof(map_key), &map_value, - TEST_PAYLOAD_LEN, BPF_ANY); + err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), + &map_value, TEST_PAYLOAD_LEN, BPF_ANY); if (!ASSERT_OK(err, "lookup test_result")) return; @@ -248,7 +247,7 @@ void test_xdp_context_veth(void) if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel); + assert_test_result(skel->maps.test_result); close: close_netns(nstoken); @@ -313,7 +312,7 @@ void test_xdp_context_tuntap(void) if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) goto close; - assert_test_result(skel); + assert_test_result(skel->maps.test_result); close: if (tap_fd >= 0) -- 2.43.0 We want to add more test cases to cover different ways to access the metadata area. Prepare for it. Pull up the skeleton management. Signed-off-by: Jakub Sitnicki --- .../bpf/prog_tests/xdp_context_test_run.c | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 0134651d94ab..6c66e27e5bc7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -256,12 +256,13 @@ void test_xdp_context_veth(void) netns_free(tx_ns); } -void test_xdp_context_tuntap(void) +static void test_tuntap(struct bpf_program *xdp_prog, + struct bpf_program *tc_prog, + struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); struct netns_obj *ns = NULL; - struct test_xdp_meta *skel = NULL; __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int tap_fd = -1; int tap_ifindex; @@ -277,10 +278,6 @@ void test_xdp_context_tuntap(void) SYS(close, "ip link set dev " TAP_NAME " up"); - skel = test_xdp_meta__open_and_load(); - if (!ASSERT_OK_PTR(skel, "open and load skeleton")) - goto close; - tap_ifindex = if_nametoindex(TAP_NAME); if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) goto close; @@ -290,12 +287,12 @@ void test_xdp_context_tuntap(void) if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls); + tc_opts.prog_fd = bpf_program__fd(tc_prog); ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; - ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; @@ -312,11 +309,25 @@ void test_xdp_context_tuntap(void) if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) goto close; - assert_test_result(skel->maps.test_result); + assert_test_result(result_map); close: if (tap_fd >= 0) close(tap_fd); - test_xdp_meta__destroy(skel); netns_free(ns); } + +void test_xdp_context_tuntap(void) +{ + struct test_xdp_meta *skel = NULL; + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + return; + + if (test__start_subtest("data_meta")) + test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, + skel->maps.test_result); + + test_xdp_meta__destroy(skel); +} -- 2.43.0 Exercise reading from SKB metadata area in two new ways: 1. indirectly, with bpf_dynptr_read(), and 2. directly, with bpf_dynptr_slice(). Signed-off-by: Jakub Sitnicki --- tools/testing/selftests/bpf/bpf_kfuncs.h | 3 ++ .../bpf/prog_tests/xdp_context_test_run.c | 21 +++++++++++ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 42 ++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 9386dfe8b884..794d44d19c88 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym __weak; +extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym __weak; + /* Description * Obtain a read-only pointer to the dynptr's data * Returns diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 6c66e27e5bc7..7e4526461a4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -171,6 +171,18 @@ static void assert_test_result(const struct bpf_map *result_map) "test_result map contains test payload"); } +static bool clear_test_result(struct bpf_map *result_map) +{ + const __u8 v[sizeof(test_payload)] = {}; + const __u32 k = 0; + int err; + + err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); + ASSERT_OK(err, "update test_result"); + + return err == 0; +} + void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -268,6 +280,9 @@ static void test_tuntap(struct bpf_program *xdp_prog, int tap_ifindex; int ret; + if (!clear_test_result(result_map)) + return; + ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) return; @@ -328,6 +343,12 @@ void test_xdp_context_tuntap(void) if (test__start_subtest("data_meta")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, skel->maps.test_result); + if (test__start_subtest("dynptr_read")) + test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, + skel->maps.test_result); + if (test__start_subtest("dynptr_slice")) + test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, + skel->maps.test_result); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fcf6ca14f2ea..0ba647fb1b1d 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -1,8 +1,10 @@ +#include #include #include #include #include +#include "bpf_kfuncs.h" #define META_SIZE 32 @@ -40,6 +42,46 @@ int ing_cls(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Read from metadata using bpf_dynptr_read helper */ +SEC("tc") +int ing_cls_dynptr_read(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + + return TC_ACT_SHOT; +} + +/* Read from metadata using read-only dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_SHOT; +} + SEC("xdp") int ing_xdp(struct xdp_md *ctx) { -- 2.43.0 Add tests what exercise writes to skb metadata in two ways: 1. indirectly, using bpf_dynptr_write helper, 2. directly, using a read-write dynptr slice. Signed-off-by: Jakub Sitnicki --- .../bpf/prog_tests/xdp_context_test_run.c | 36 ++++++++++-- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 67 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 7e4526461a4c..79c4c58276e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -269,7 +269,8 @@ void test_xdp_context_veth(void) } static void test_tuntap(struct bpf_program *xdp_prog, - struct bpf_program *tc_prog, + struct bpf_program *tc_prio_1_prog, + struct bpf_program *tc_prio_2_prog, struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -302,11 +303,20 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(tc_prog); + tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog); ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; + if (tc_prio_2_prog) { + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2, + .prog_fd = bpf_program__fd(tc_prio_2_prog)); + + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + } + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) @@ -341,13 +351,29 @@ void test_xdp_context_tuntap(void) return; if (test__start_subtest("data_meta")) - test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls, + NULL, /* tc prio 2 */ skel->maps.test_result); if (test__start_subtest("dynptr_read")) - test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_read, + NULL, /* tc prio 2 */ skel->maps.test_result); if (test__start_subtest("dynptr_slice")) - test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_slice, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_write")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_write, + skel->progs.ing_cls_dynptr_read, + skel->maps.test_result); + if (test__start_subtest("dynptr_slice_rdwr")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_slice_rdwr, + skel->progs.ing_cls_dynptr_slice, skel->maps.test_result); test_xdp_meta__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 0ba647fb1b1d..e7879860f403 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -60,6 +60,24 @@ int ing_cls_dynptr_read(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Write to metadata using bpf_dynptr_write helper */ +SEC("tc") +int ing_cls_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, META_SIZE, 0); + + return TC_ACT_UNSPEC; /* pass */ +} + /* Read from metadata using read-only dynptr slice */ SEC("tc") int ing_cls_dynptr_slice(struct __sk_buff *ctx) @@ -82,6 +100,55 @@ int ing_cls_dynptr_slice(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Write to metadata using writeable dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src, *dst; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!dst) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Reserve and clear space for metadata but don't populate it */ +SEC("xdp") +int ing_xdp_zalloc_meta(struct xdp_md *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *meta; + int ret; + + /* Drop any non-test packets */ + if (eth + 1 > ctx_ptr(ctx, data_end)) + return XDP_DROP; + if (eth->h_proto != 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); + if (ret < 0) + return XDP_DROP; + + meta = ctx_ptr(ctx, data_meta); + if (meta + META_SIZE > ctx_ptr(ctx, data)) + return XDP_DROP; + + __builtin_memset(meta, 0, META_SIZE); + + return XDP_PASS; +} + SEC("xdp") int ing_xdp(struct xdp_md *ctx) { -- 2.43.0 Exercise r/w access to skb metadata through an offset-adjusted dynptr, read/write helper with an offset argument, and a slice starting at an offset. Signed-off-by: Jakub Sitnicki --- .../bpf/prog_tests/xdp_context_test_run.c | 5 ++ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 73 ++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 79c4c58276e6..602fa69afecb 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -375,6 +375,11 @@ void test_xdp_context_tuntap(void) skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, skel->maps.test_result); + if (test__start_subtest("dynptr_offset")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_offset_wr, + skel->progs.ing_cls_dynptr_offset_rd, + skel->maps.test_result); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index e7879860f403..8f61aa997f74 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -122,6 +122,79 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) return TC_ACT_UNSPEC; /* pass */ } +/* + * Read skb metadata in chunks from various offsets in different ways. + */ +SEC("tc") +int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 chunk_len = META_SIZE / 4; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + /* 1. Regular read */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 2. Read from an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 3. Read at an offset */ + bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0); + dst += chunk_len; + + /* 4. Read from a slice starting at an offset */ + src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); + if (!src) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_SHOT; +} + +/* Write skb metadata in chunks at various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) +{ + const __u32 chunk_len = META_SIZE / 4; + __u8 payload[META_SIZE]; + struct bpf_dynptr meta; + __u8 *dst, *src; + + bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload)); + src = payload; + + /* 1. Regular write */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 2. Write to an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 3. Write at an offset */ + bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0); + src += chunk_len; + + /* 4. Write to a slice starting at an offset */ + dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len); + if (!dst) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_UNSPEC; /* pass */ +} + /* Reserve and clear space for metadata but don't populate it */ SEC("xdp") int ing_xdp_zalloc_meta(struct xdp_md *ctx) -- 2.43.0 Currently we don't expect skb metadata to persist beyond the device hooks. Extend the test run BPF program from the Netfilter pre-routing hook to verify this behavior. Note, that the added test has no observable side-effect yet. This will be addressed by the next change. Signed-off-by: Jakub Sitnicki --- .../bpf/prog_tests/xdp_context_test_run.c | 95 ++++++++++++++++------ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 64 ++++++++++----- tools/testing/selftests/bpf/test_progs.h | 1 + 3 files changed, 117 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 602fa69afecb..97e1dae9a2e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -19,6 +19,9 @@ static const __u8 test_payload[TEST_PAYLOAD_LEN] = { 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, }; +#define PACKET_LEN \ + (sizeof(struct ethhdr) + sizeof(struct iphdr) + TEST_PAYLOAD_LEN) + void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, __u32 ingress_ifindex, __u32 rx_queue_index, @@ -120,18 +123,38 @@ void test_xdp_context_test_run(void) test_xdp_context_test_run__destroy(skel); } +static void init_test_packet(__u8 *pkt) +{ + struct ethhdr *eth = &(struct ethhdr){ + .h_dest = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }, + .h_source = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }, + .h_proto = htons(ETH_P_IP), + }; + struct iphdr *iph = &(struct iphdr){ + .ihl = 5, + .version = IPVERSION, + .ttl = IPDEFTTL, + .protocol = 61, /* host internal protocol */ + .saddr = inet_addr("10.0.0.2"), + .daddr = inet_addr("10.0.0.1"), + }; + + eth = memcpy(pkt, eth, sizeof(*eth)); + pkt += sizeof(*eth); + iph = memcpy(pkt, iph, sizeof(*iph)); + pkt += sizeof(*iph); + memcpy(pkt, test_payload, sizeof(test_payload)); + + iph->tot_len = htons(sizeof(*iph) + sizeof(test_payload)); + iph->check = build_ip_csum(iph); +} + static int send_test_packet(int ifindex) { + __u8 packet[PACKET_LEN]; int n, sock = -1; - __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; - - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; - memcpy(packet, ð, sizeof(eth)); - memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + init_test_packet(packet); sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW); if (!ASSERT_GE(sock, 0, "socket")) @@ -271,17 +294,18 @@ void test_xdp_context_veth(void) static void test_tuntap(struct bpf_program *xdp_prog, struct bpf_program *tc_prio_1_prog, struct bpf_program *tc_prio_2_prog, + struct bpf_program *nf_prog, struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); - LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct bpf_link *nf_link = NULL; struct netns_obj *ns = NULL; - __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + __u8 packet[PACKET_LEN]; int tap_fd = -1; int tap_ifindex; int ret; - if (!clear_test_result(result_map)) + if (result_map && !clear_test_result(result_map)) return; ns = netns_new(TAP_NETNS, true); @@ -292,6 +316,8 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) goto close; + SYS(close, "ip link set dev " TAP_NAME " addr 02:00:00:00:00:01"); + SYS(close, "ip addr add dev " TAP_NAME " 10.0.0.1/24"); SYS(close, "ip link set dev " TAP_NAME " up"); tap_ifindex = if_nametoindex(TAP_NAME); @@ -303,10 +329,14 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog); - ret = bpf_tc_attach(&tc_hook, &tc_opts); - if (!ASSERT_OK(ret, "bpf_tc_attach")) - goto close; + if (tc_prio_1_prog) { + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1, + .prog_fd = bpf_program__fd(tc_prio_1_prog)); + + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + } if (tc_prio_2_prog) { LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2, @@ -317,28 +347,33 @@ static void test_tuntap(struct bpf_program *xdp_prog, goto close; } + if (nf_prog) { + LIBBPF_OPTS(bpf_netfilter_opts, nf_opts, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING); + + nf_link = bpf_program__attach_netfilter(nf_prog, &nf_opts); + if (!ASSERT_OK_PTR(nf_link, "attach_netfilter")) + goto close; + } + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; - - memcpy(packet, ð, sizeof(eth)); - memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); - + init_test_packet(packet); ret = write(tap_fd, packet, sizeof(packet)); if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) goto close; - assert_test_result(result_map); + if (result_map) + assert_test_result(result_map); close: if (tap_fd >= 0) close(tap_fd); + if (nf_link) + bpf_link__destroy(nf_link); netns_free(ns); } @@ -354,32 +389,44 @@ void test_xdp_context_tuntap(void) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, NULL, /* tc prio 2 */ + NULL, /* netfilter */ skel->maps.test_result); if (test__start_subtest("dynptr_read")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, NULL, /* tc prio 2 */ + NULL, /* netfilter */ skel->maps.test_result); if (test__start_subtest("dynptr_slice")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, NULL, /* tc prio 2 */ + NULL, /* netfilter */ skel->maps.test_result); if (test__start_subtest("dynptr_write")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_write, skel->progs.ing_cls_dynptr_read, + NULL, /* netfilter */ skel->maps.test_result); if (test__start_subtest("dynptr_slice_rdwr")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, + NULL, /* netfilter */ skel->maps.test_result); if (test__start_subtest("dynptr_offset")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_offset_wr, skel->progs.ing_cls_dynptr_offset_rd, + NULL, /* netfilter */ skel->maps.test_result); + if (test__start_subtest("dynptr_nf_hook")) + test_tuntap(skel->progs.ing_xdp, + NULL, /* tc prio 1 */ + NULL, /* tc prio 2 */ + skel->progs.ing_nf, + NULL /* ignore result for now */); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 8f61aa997f74..a42a6dd4343c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -1,15 +1,25 @@ #include #include #include +#include #include +#include #include #include "bpf_kfuncs.h" +#define META_OFFSET (sizeof(struct ethhdr) + sizeof(struct iphdr)) #define META_SIZE 32 +#define NF_DROP 0 +#define NF_ACCEPT 1 + #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem +struct bpf_nf_ctx { + struct sk_buff *skb; +} __attribute__((preserve_access_index)); + /* Demonstrates how metadata can be passed from an XDP program to a TC program * using bpf_xdp_adjust_meta. * For the sake of testing the metadata support in drivers, the XDP program uses @@ -60,6 +70,20 @@ int ing_cls_dynptr_read(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Check that we can't get a dynptr slice to skb metadata yet */ +SEC("netfilter") +int ing_nf(struct bpf_nf_ctx *ctx) +{ + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; + struct bpf_dynptr meta; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (bpf_dynptr_size(&meta) != 0) + return NF_DROP; + + return NF_ACCEPT; +} + /* Write to metadata using bpf_dynptr_write helper */ SEC("tc") int ing_cls_dynptr_write(struct __sk_buff *ctx) @@ -68,7 +92,7 @@ int ing_cls_dynptr_write(struct __sk_buff *ctx) __u8 *src; bpf_dynptr_from_skb(ctx, 0, &data); - src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + src = bpf_dynptr_slice(&data, META_OFFSET, NULL, META_SIZE); if (!src) return TC_ACT_SHOT; @@ -108,7 +132,7 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) __u8 *src, *dst; bpf_dynptr_from_skb(ctx, 0, &data); - src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + src = bpf_dynptr_slice(&data, META_OFFSET, NULL, META_SIZE); if (!src) return TC_ACT_SHOT; @@ -169,7 +193,7 @@ int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) struct bpf_dynptr meta; __u8 *dst, *src; - bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload)); + bpf_skb_load_bytes(ctx, META_OFFSET, payload, sizeof(payload)); src = payload; /* 1. Regular write */ @@ -199,14 +223,18 @@ int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) SEC("xdp") int ing_xdp_zalloc_meta(struct xdp_md *ctx) { - struct ethhdr *eth = ctx_ptr(ctx, data); + const void *data_end = ctx_ptr(ctx, data_end); + const struct ethhdr *eth; + const struct iphdr *iph; __u8 *meta; int ret; - /* Drop any non-test packets */ - if (eth + 1 > ctx_ptr(ctx, data_end)) + /* Expect Eth | IPv4 (proto=61) | ... */ + eth = ctx_ptr(ctx, data); + if (eth + 1 > data_end || eth->h_proto != bpf_htons(ETH_P_IP)) return XDP_DROP; - if (eth->h_proto != 0) + iph = (void *)(eth + 1); + if (iph + 1 > data_end || iph->protocol != 61) return XDP_DROP; ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); @@ -226,7 +254,8 @@ SEC("xdp") int ing_xdp(struct xdp_md *ctx) { __u8 *data, *data_meta, *data_end, *payload; - struct ethhdr *eth; + const struct ethhdr *eth; + const struct iphdr *iph; int ret; ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); @@ -237,18 +266,15 @@ int ing_xdp(struct xdp_md *ctx) data_end = ctx_ptr(ctx, data_end); data = ctx_ptr(ctx, data); - eth = (struct ethhdr *)data; - payload = data + sizeof(struct ethhdr); - - if (payload + META_SIZE > data_end || - data_meta + META_SIZE > data) + /* Expect Eth | IPv4 (proto=61) | meta blob */ + eth = (void *)data; + if (eth + 1 > data_end || eth->h_proto != bpf_htons(ETH_P_IP)) return XDP_DROP; - - /* The Linux networking stack may send other packets on the test - * interface that interfere with the test. Just drop them. - * The test packets can be recognized by their ethertype of zero. - */ - if (eth->h_proto != 0) + iph = (void *)(eth + 1); + if (iph + 1 > data_end || iph->protocol != 61) + return XDP_DROP; + payload = (void *)(iph + 1); + if (payload + META_SIZE > data_end || data_meta + META_SIZE > data) return XDP_DROP; __builtin_memcpy(data_meta, payload, META_SIZE); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index df2222a1806f..204f54cdaab1 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -20,6 +20,7 @@ typedef __u16 __sum16; #include #include #include +#include #include #include #include -- 2.43.0 The skb metadata tests for BPF programs which don't have metadata access yet have no observable side-effects. Hence, we can't detect breakage. Count each successful BPF program pass, when taking the expected path, as a side-effect to test for. Signed-off-by: Jakub Sitnicki --- .../bpf/prog_tests/xdp_context_test_run.c | 20 +++++++++++++++- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 28 ++++++++++++++-------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 97e1dae9a2e7..a28bf67b6d8d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -294,13 +294,15 @@ void test_xdp_context_veth(void) static void test_tuntap(struct bpf_program *xdp_prog, struct bpf_program *tc_prio_1_prog, struct bpf_program *tc_prio_2_prog, - struct bpf_program *nf_prog, + struct bpf_program *nf_prog, __u32 *prog_run_cnt, struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); struct bpf_link *nf_link = NULL; struct netns_obj *ns = NULL; __u8 packet[PACKET_LEN]; + __u32 want_run_cnt = 0; + __u32 have_run_cnt; int tap_fd = -1; int tap_ifindex; int ret; @@ -336,6 +338,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; + want_run_cnt++; } if (tc_prio_2_prog) { @@ -345,6 +348,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; + want_run_cnt++; } if (nf_prog) { @@ -354,18 +358,25 @@ static void test_tuntap(struct bpf_program *xdp_prog, nf_link = bpf_program__attach_netfilter(nf_prog, &nf_opts); if (!ASSERT_OK_PTR(nf_link, "attach_netfilter")) goto close; + want_run_cnt++; } ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; + want_run_cnt++; init_test_packet(packet); ret = write(tap_fd, packet, sizeof(packet)); if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) goto close; + have_run_cnt = __atomic_exchange_n(prog_run_cnt, 0, __ATOMIC_SEQ_CST); + if (!ASSERT_EQ(have_run_cnt, want_run_cnt, + "unexpected bpf prog runs")) + goto close; + if (result_map) assert_test_result(result_map); @@ -390,42 +401,49 @@ void test_xdp_context_tuntap(void) skel->progs.ing_cls, NULL, /* tc prio 2 */ NULL, /* netfilter */ + &skel->bss->prog_run_cnt, skel->maps.test_result); if (test__start_subtest("dynptr_read")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, NULL, /* tc prio 2 */ NULL, /* netfilter */ + &skel->bss->prog_run_cnt, skel->maps.test_result); if (test__start_subtest("dynptr_slice")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, NULL, /* tc prio 2 */ NULL, /* netfilter */ + &skel->bss->prog_run_cnt, skel->maps.test_result); if (test__start_subtest("dynptr_write")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_write, skel->progs.ing_cls_dynptr_read, NULL, /* netfilter */ + &skel->bss->prog_run_cnt, skel->maps.test_result); if (test__start_subtest("dynptr_slice_rdwr")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, NULL, /* netfilter */ + &skel->bss->prog_run_cnt, skel->maps.test_result); if (test__start_subtest("dynptr_offset")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_offset_wr, skel->progs.ing_cls_dynptr_offset_rd, NULL, /* netfilter */ + &skel->bss->prog_run_cnt, skel->maps.test_result); if (test__start_subtest("dynptr_nf_hook")) test_tuntap(skel->progs.ing_xdp, NULL, /* tc prio 1 */ NULL, /* tc prio 2 */ skel->progs.ing_nf, + &skel->bss->prog_run_cnt, NULL /* ignore result for now */); test_xdp_meta__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index a42a6dd4343c..3731baf37e06 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -35,6 +35,14 @@ struct { __uint(value_size, META_SIZE); } test_result SEC(".maps"); +__u32 prog_run_cnt = 0; + +static __always_inline int run_ok(int retval) +{ + __sync_fetch_and_add(&prog_run_cnt, 1); + return retval; +} + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -49,7 +57,7 @@ int ing_cls(struct __sk_buff *ctx) bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY); - return TC_ACT_SHOT; + return run_ok(TC_ACT_SHOT); } /* Read from metadata using bpf_dynptr_read helper */ @@ -67,7 +75,7 @@ int ing_cls_dynptr_read(struct __sk_buff *ctx) bpf_dynptr_from_skb_meta(ctx, 0, &meta); bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); - return TC_ACT_SHOT; + return run_ok(TC_ACT_SHOT); } /* Check that we can't get a dynptr slice to skb metadata yet */ @@ -81,7 +89,7 @@ int ing_nf(struct bpf_nf_ctx *ctx) if (bpf_dynptr_size(&meta) != 0) return NF_DROP; - return NF_ACCEPT; + return run_ok(NF_ACCEPT); } /* Write to metadata using bpf_dynptr_write helper */ @@ -99,7 +107,7 @@ int ing_cls_dynptr_write(struct __sk_buff *ctx) bpf_dynptr_from_skb_meta(ctx, 0, &meta); bpf_dynptr_write(&meta, 0, src, META_SIZE, 0); - return TC_ACT_UNSPEC; /* pass */ + return run_ok(TC_ACT_UNSPEC); /* pass */ } /* Read from metadata using read-only dynptr slice */ @@ -121,7 +129,7 @@ int ing_cls_dynptr_slice(struct __sk_buff *ctx) __builtin_memcpy(dst, src, META_SIZE); - return TC_ACT_SHOT; + return run_ok(TC_ACT_SHOT); } /* Write to metadata using writeable dynptr slice */ @@ -143,7 +151,7 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) __builtin_memcpy(dst, src, META_SIZE); - return TC_ACT_UNSPEC; /* pass */ + return run_ok(TC_ACT_UNSPEC); /* pass */ } /* @@ -181,7 +189,7 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) return TC_ACT_SHOT; __builtin_memcpy(dst, src, chunk_len); - return TC_ACT_SHOT; + return run_ok(TC_ACT_SHOT); } /* Write skb metadata in chunks at various offsets in different ways. */ @@ -216,7 +224,7 @@ int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) return TC_ACT_SHOT; __builtin_memcpy(dst, src, chunk_len); - return TC_ACT_UNSPEC; /* pass */ + return run_ok(TC_ACT_UNSPEC); /* pass */ } /* Reserve and clear space for metadata but don't populate it */ @@ -247,7 +255,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx) __builtin_memset(meta, 0, META_SIZE); - return XDP_PASS; + return run_ok(XDP_PASS); } SEC("xdp") @@ -278,7 +286,7 @@ int ing_xdp(struct xdp_md *ctx) return XDP_DROP; __builtin_memcpy(data_meta, payload, META_SIZE); - return XDP_PASS; + return run_ok(XDP_PASS); } char _license[] SEC("license") = "GPL"; -- 2.43.0