It is to unify map flags checking for lookup_elem, update_elem, lookup_batch and update_batch APIs. Therefore, it will be convenient to check BPF_F_CPU and BPF_F_ALL_CPUS flags in it for these APIs in next patch. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 28 ++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 34 +++++++++++----------------------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8f6e87f0f3a89..512717d442c09 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3709,4 +3709,32 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char * const char **linep, int *nump); struct bpf_prog *bpf_prog_find_from_stack(void); +static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 extra_flags_mask) +{ + if (extra_flags_mask && (flags & extra_flags_mask)) + return -EINVAL; + + if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL; + + return 0; +} + +static inline int bpf_map_check_update_flags(struct bpf_map *map, u64 flags) +{ + return bpf_map_check_op_flags(map, flags, 0); +} + +#define BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK (~BPF_F_LOCK) + +static inline int bpf_map_check_lookup_flags(struct bpf_map *map, u64 flags) +{ + return bpf_map_check_op_flags(map, flags, BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK); +} + +static inline int bpf_map_check_batch_flags(struct bpf_map *map, u64 flags) +{ + return bpf_map_check_op_flags(map, flags, BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK); +} + #endif /* _LINUX_BPF_H */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0fbfa8532c392..4e04b35944a2b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1669,9 +1669,6 @@ static int map_lookup_elem(union bpf_attr *attr) if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL; - if (attr->flags & ~BPF_F_LOCK) - return -EINVAL; - CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) @@ -1679,9 +1676,9 @@ static int map_lookup_elem(union bpf_attr *attr) if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) return -EPERM; - if ((attr->flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) - return -EINVAL; + err = bpf_map_check_lookup_flags(map, attr->flags); + if (err) + return err; key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) @@ -1744,11 +1741,9 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - if ((attr->flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { - err = -EINVAL; + err = bpf_map_check_update_flags(map, attr->flags); + if (err) goto err_put; - } key = ___bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { @@ -1952,13 +1947,9 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, void *key, *value; int err = 0; - if (attr->batch.elem_flags & ~BPF_F_LOCK) - return -EINVAL; - - if ((attr->batch.elem_flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { - return -EINVAL; - } + err = bpf_map_check_batch_flags(map, attr->batch.elem_flags); + if (err) + return err; value_size = bpf_map_value_size(map); @@ -2015,12 +2006,9 @@ int generic_map_lookup_batch(struct bpf_map *map, u32 value_size, cp, max_count; int err; - if (attr->batch.elem_flags & ~BPF_F_LOCK) - return -EINVAL; - - if ((attr->batch.elem_flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) - return -EINVAL; + err = bpf_map_check_batch_flags(map, attr->batch.elem_flags); + if (err) + return err; value_size = bpf_map_value_size(map); -- 2.50.1 Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags and the following internal helper functions for percpu maps: * bpf_percpu_copy_to_user: For lookup_elem and lookup_batch user APIs, copy data to user-provided value pointer. * bpf_percpu_copy_from_user: For update_elem and update_batch user APIs, copy data from user-provided value pointer. * bpf_map_check_cpu_flags: Check BPF_F_CPU, BPF_F_ALL_CPUS and cpu info in flags. And, get the correct value size for these user APIs. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 89 ++++++++++++++++++++++++++++++++-- include/uapi/linux/bpf.h | 2 + kernel/bpf/syscall.c | 24 ++++----- tools/include/uapi/linux/bpf.h | 2 + 4 files changed, 103 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 512717d442c09..a83364949b64c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -547,6 +547,56 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } +#ifdef CONFIG_BPF_SYSCALL +static inline void bpf_percpu_copy_to_user(struct bpf_map *map, void __percpu *pptr, void *value, + u32 size, u64 flags) +{ + int current_cpu = raw_smp_processor_id(); + int cpu, off = 0; + + if (flags & BPF_F_CPU) { + cpu = flags >> 32; + copy_map_value_long(map, value, cpu != current_cpu ? per_cpu_ptr(pptr, cpu) : + this_cpu_ptr(pptr)); + check_and_init_map_value(map, value); + } else { + for_each_possible_cpu(cpu) { + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value + off); + off += size; + } + } +} + +void bpf_obj_free_fields(const struct btf_record *rec, void *obj); + +static inline void bpf_percpu_copy_from_user(struct bpf_map *map, void __percpu *pptr, void *value, + u32 size, u64 flags) +{ + int current_cpu = raw_smp_processor_id(); + int cpu, off = 0; + void *ptr; + + if (flags & BPF_F_CPU) { + cpu = flags >> 32; + ptr = cpu == current_cpu ? this_cpu_ptr(pptr) : per_cpu_ptr(pptr, cpu); + copy_map_value_long(map, ptr, value); + bpf_obj_free_fields(map->record, ptr); + } else { + for_each_possible_cpu(cpu) { + copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); + /* same user-provided value is used if + * BPF_F_ALL_CPUS is specified, otherwise value is + * an array of per-cpu values. + */ + if (!(flags & BPF_F_ALL_CPUS)) + off += size; + bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu)); + } + } +} +#endif + static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src) { unsigned long *src_uptr, *dst_uptr; @@ -2417,7 +2467,6 @@ struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); -void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); @@ -3709,14 +3758,25 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char * const char **linep, int *nump); struct bpf_prog *bpf_prog_find_from_stack(void); +static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) +{ + return false; +} + static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 extra_flags_mask) { - if (extra_flags_mask && (flags & extra_flags_mask)) + if (extra_flags_mask && ((u32)flags & extra_flags_mask)) return -EINVAL; if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) return -EINVAL; + if (!(flags & BPF_F_CPU) && flags >> 32) + return -EINVAL; + + if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && !bpf_map_supports_cpu_flags(map->map_type)) + return -EINVAL; + return 0; } @@ -3725,7 +3785,7 @@ static inline int bpf_map_check_update_flags(struct bpf_map *map, u64 flags) return bpf_map_check_op_flags(map, flags, 0); } -#define BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK (~BPF_F_LOCK) +#define BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK (~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS)) static inline int bpf_map_check_lookup_flags(struct bpf_map *map, u64 flags) { @@ -3737,4 +3797,27 @@ static inline int bpf_map_check_batch_flags(struct bpf_map *map, u64 flags) return bpf_map_check_op_flags(map, flags, BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK); } +static inline int bpf_map_check_cpu_flags(u64 flags, bool check_all_cpus_flag) +{ + const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS; + u32 cpu; + + if (check_all_cpus_flag) { + if (unlikely((u32)flags > BPF_F_ALL_CPUS)) + /* unknown flags */ + return -EINVAL; + if (unlikely((flags & cpu_flags) == cpu_flags)) + return -EINVAL; + } else { + if (unlikely((u32)flags & ~BPF_F_CPU)) + return -EINVAL; + } + + cpu = flags >> 32; + if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus())) + return -ERANGE; + + return 0; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 233de8677382e..be1fdc5042744 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1372,6 +1372,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e04b35944a2b..dbd21484d7a4d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -131,12 +131,14 @@ bool bpf_map_write_active(const struct bpf_map *map) return atomic64_read(&map->writecnt) != 0; } -static u32 bpf_map_value_size(const struct bpf_map *map) -{ - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) +static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) +{ + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) + return round_up(map->value_size, 8); + else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) return round_up(map->value_size, 8) * num_possible_cpus(); else if (IS_FD_MAP(map)) return sizeof(u32); @@ -1684,7 +1686,7 @@ static int map_lookup_elem(union bpf_attr *attr) if (IS_ERR(key)) return PTR_ERR(key); - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->flags); err = -ENOMEM; value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); @@ -1751,7 +1753,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->flags); value = kvmemdup_bpfptr(uvalue, value_size); if (IS_ERR(value)) { err = PTR_ERR(value); @@ -1951,7 +1953,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, if (err) return err; - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->batch.elem_flags); max_count = attr->batch.count; if (!max_count) @@ -2010,7 +2012,7 @@ int generic_map_lookup_batch(struct bpf_map *map, if (err) return err; - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->batch.elem_flags); max_count = attr->batch.count; if (!max_count) @@ -2132,7 +2134,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) goto err_put; } - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, 0); err = -ENOMEM; value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 233de8677382e..be1fdc5042744 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1372,6 +1372,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ -- 2.50.1 Introduce support for the BPF_F_ALL_CPUS flag in percpu_array maps to allow updating values for all CPUs with a single value for both update_elem and update_batch APIs. Introduce support for the BPF_F_CPU flag in percpu_array maps to allow: * update value for specified CPU for both update_elem and update_batch APIs. * lookup value for specified CPU for both lookup_elem and lookup_batch APIs. The BPF_F_CPU flag is passed via: * map_flags of lookup_elem and update_elem APIs along with embedded cpu info. * elem_flags of lookup_batch and update_batch APIs along with embedded cpu info. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 10 ++++++++-- kernel/bpf/arraymap.c | 28 ++++++++++++---------------- kernel/bpf/syscall.c | 2 +- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a83364949b64c..bc44b72129e59 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2746,7 +2746,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env, struct bpf_func_state *callee); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, + u64 flags); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, @@ -3760,7 +3761,12 @@ struct bpf_prog *bpf_prog_find_from_stack(void); static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) { - return false; + switch (map_type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + return true; + default: + return false; + } } static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 extra_flags_mask) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3d080916faf97..42a75fbc588a1 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -295,17 +295,21 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); } -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; - int cpu, off = 0; u32 size; + int err; if (unlikely(index >= array->map.max_entries)) return -ENOENT; + err = bpf_map_check_cpu_flags(flags, false); + if (unlikely(err)) + return err; + /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas * will not leak any kernel data @@ -313,11 +317,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; - for_each_possible_cpu(cpu) { - copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); - check_and_init_map_value(map, value + off); - off += size; - } + bpf_percpu_copy_to_user(map, pptr, value, size, flags); rcu_read_unlock(); return 0; } @@ -387,12 +387,12 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; - int cpu, off = 0; u32 size; + int err; - if (unlikely(map_flags > BPF_EXIST)) - /* unknown flags */ - return -EINVAL; + err = bpf_map_check_cpu_flags(map_flags, true); + if (unlikely(err)) + return err; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ @@ -411,11 +411,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; - for_each_possible_cpu(cpu) { - copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); - bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu)); - off += size; - } + bpf_percpu_copy_from_user(map, pptr, value, size, map_flags); rcu_read_unlock(); return 0; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dbd21484d7a4d..2ee2cf5a8cedf 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -316,7 +316,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { - err = bpf_percpu_array_copy(map, key, value); + err = bpf_percpu_array_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { err = bpf_percpu_cgroup_storage_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { -- 2.50.1 Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash maps to allow updating values for all CPUs with a single value for both update_elem and update_batch APIs. Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash maps to allow: * update value for specified CPU for both update_elem and update_batch APIs. * lookup value for specified CPU for both lookup_elem and lookup_batch APIs. The BPF_F_CPU flag is passed via: * map_flags along with embedded cpu info. * elem_flags along with embedded cpu info. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 4 +- kernel/bpf/hashtab.c | 95 +++++++++++++++++++++++++++----------------- kernel/bpf/syscall.c | 2 +- 3 files changed, 63 insertions(+), 38 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bc44b72129e59..c120b00448a13 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2745,7 +2745,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, @@ -3763,6 +3763,8 @@ static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) { switch (map_type) { case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: return true; default: return false; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 71f9931ac64cd..031a74c1b7fd7 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -937,24 +937,39 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) } static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, - void *value, bool onallcpus) + void *value, bool onallcpus, u64 map_flags) { + int cpu = map_flags & BPF_F_CPU ? map_flags >> 32 : 0; + int current_cpu = raw_smp_processor_id(); + if (!onallcpus) { /* copy true value_size bytes */ - copy_map_value(&htab->map, this_cpu_ptr(pptr), value); + copy_map_value(&htab->map, (map_flags & BPF_F_CPU) && cpu != current_cpu ? + per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value); } else { u32 size = round_up(htab->map.value_size, 8); - int off = 0, cpu; + int off = 0; + + if (map_flags & BPF_F_CPU) { + copy_map_value_long(&htab->map, cpu != current_cpu ? + per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value); + return; + } for_each_possible_cpu(cpu) { copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); - off += size; + /* same user-provided value is used if + * BPF_F_ALL_CPUS is specified, otherwise value is + * an array of per-cpu values. + */ + if (!(map_flags & BPF_F_ALL_CPUS)) + off += size; } } } static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, - void *value, bool onallcpus) + void *value, bool onallcpus, u64 map_flags) { /* When not setting the initial value on all cpus, zero-fill element * values for other cpus. Otherwise, bpf program has no way to ensure @@ -972,7 +987,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); } } else { - pcpu_copy_value(htab, pptr, value, onallcpus); + pcpu_copy_value(htab, pptr, value, onallcpus, map_flags); } } @@ -984,7 +999,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, - struct htab_elem *old_elem) + struct htab_elem *old_elem, u64 map_flags) { u32 size = htab->map.value_size; bool prealloc = htab_is_prealloc(htab); @@ -1042,7 +1057,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, pptr = *(void __percpu **)ptr; } - pcpu_init_value(htab, pptr, value, onallcpus); + pcpu_init_value(htab, pptr, value, onallcpus, map_flags); if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); @@ -1147,7 +1162,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, } l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, - l_old); + l_old, map_flags); if (IS_ERR(l_new)) { /* all pre-allocated elements are in use or memory exhausted */ ret = PTR_ERR(l_new); @@ -1263,9 +1278,15 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, u32 key_size, hash; int ret; - if (unlikely(map_flags > BPF_EXIST)) - /* unknown flags */ - return -EINVAL; + if (percpu) { + ret = bpf_map_check_cpu_flags(map_flags, true); + if (unlikely(ret)) + return ret; + } else { + if (unlikely(map_flags > BPF_EXIST)) + /* unknown flags */ + return -EINVAL; + } WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); @@ -1291,7 +1312,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, /* Update value in-place */ if (percpu) { pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), - value, onallcpus); + value, onallcpus, map_flags); } else { void **inner_map_pptr = htab_elem_value(l_old, key_size); @@ -1300,7 +1321,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, } } else { l_new = alloc_htab_elem(htab, key, value, key_size, - hash, percpu, onallcpus, NULL); + hash, percpu, onallcpus, NULL, map_flags); if (IS_ERR(l_new)) { ret = PTR_ERR(l_new); goto err; @@ -1326,9 +1347,9 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, u32 key_size, hash; int ret; - if (unlikely(map_flags > BPF_EXIST)) - /* unknown flags */ - return -EINVAL; + ret = bpf_map_check_cpu_flags(map_flags, true); + if (unlikely(ret)) + return ret; WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); @@ -1366,10 +1387,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, /* per-cpu hash map can update value in-place */ pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), - value, onallcpus); + value, onallcpus, map_flags); } else { pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), - value, onallcpus); + value, onallcpus, map_flags); hlist_nulls_add_head_rcu(&l_new->hash_node, head); l_new = NULL; } @@ -1698,9 +1719,16 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, int ret = 0; elem_map_flags = attr->batch.elem_flags; - if ((elem_map_flags & ~BPF_F_LOCK) || - ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) - return -EINVAL; + if (!do_delete && is_percpu) { + ret = bpf_map_check_lookup_flags(map, elem_map_flags); + if (ret) + return ret; + } else { + if ((elem_map_flags & ~BPF_F_LOCK) || + ((elem_map_flags & BPF_F_LOCK) && + !btf_record_has_field(map->record, BPF_SPIN_LOCK))) + return -EINVAL; + } map_flags = attr->batch.flags; if (map_flags) @@ -1802,15 +1830,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, memcpy(dst_key, l->key, key_size); if (is_percpu) { - int off = 0, cpu; void __percpu *pptr; pptr = htab_elem_get_ptr(l, map->key_size); - for_each_possible_cpu(cpu) { - copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); - check_and_init_map_value(&htab->map, dst_val + off); - off += size; - } + bpf_percpu_copy_to_user(&htab->map, pptr, dst_val, size, elem_map_flags); } else { value = htab_elem_value(l, key_size); if (is_fd_htab(htab)) { @@ -2365,13 +2388,17 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k return NULL; } -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct htab_elem *l; void __percpu *pptr; - int ret = -ENOENT; - int cpu, off = 0; u32 size; + int ret; + + ret = bpf_map_check_cpu_flags(map_flags, false); + if (unlikely(ret)) + return ret; + ret = -ENOENT; /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas @@ -2386,11 +2413,7 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) * eviction heuristics when user space does a map walk. */ pptr = htab_elem_get_ptr(l, map->key_size); - for_each_possible_cpu(cpu) { - copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); - check_and_init_map_value(map, value + off); - off += size; - } + bpf_percpu_copy_to_user(map, pptr, value, size, map_flags); ret = 0; out: rcu_read_unlock(); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2ee2cf5a8cedf..91ab9bdd1da38 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -314,7 +314,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, bpf_disable_instrumentation(); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { - err = bpf_percpu_hash_copy(map, key, value); + err = bpf_percpu_hash_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { -- 2.50.1 Introduce BPF_F_ALL_CPUS flag support for percpu_cgroup_storage maps to allow updating values for all CPUs with a single value for update_elem API. Introduce BPF_F_CPU flag support for percpu_cgroup_storage maps to allow: * update value for specified CPU for update_elem API. * lookup value for specified CPU for lookup_elem API. The BPF_F_CPU flag is passed via map_flags along with embedded cpu info. Signed-off-by: Leon Hwang --- include/linux/bpf-cgroup.h | 5 +++-- include/linux/bpf.h | 1 + kernel/bpf/local_storage.c | 42 ++++++++++++++++++++------------------ kernel/bpf/syscall.c | 2 +- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index aedf573bdb426..1cb28660aa866 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -172,7 +172,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); -int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value, + u64 flags); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags); @@ -467,7 +468,7 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, - void *value) { + void *value, u64 flags) { return 0; } static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c120b00448a13..2d67af3e7f870 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3765,6 +3765,7 @@ static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) case BPF_MAP_TYPE_PERCPU_ARRAY: case BPF_MAP_TYPE_PERCPU_HASH: case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: return true; default: return false; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index c93a756e035c0..31b146cf8f48e 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -180,18 +180,22 @@ static long cgroup_storage_update_elem(struct bpf_map *map, void *key, } int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key, - void *value) + void *value, u64 map_flags) { struct bpf_cgroup_storage_map *map = map_to_storage(_map); struct bpf_cgroup_storage *storage; - int cpu, off = 0; u32 size; + int err; + + err = bpf_map_check_cpu_flags(map_flags, false); + if (err) + return err; rcu_read_lock(); storage = cgroup_storage_lookup(map, key, false); if (!storage) { - rcu_read_unlock(); - return -ENOENT; + err = -ENOENT; + goto unlock; } /* per_cpu areas are zero-filled and bpf programs can only @@ -199,13 +203,10 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key, * will not leak any kernel data */ size = round_up(_map->value_size, 8); - for_each_possible_cpu(cpu) { - bpf_long_memcpy(value + off, - per_cpu_ptr(storage->percpu_buf, cpu), size); - off += size; - } + bpf_percpu_copy_to_user(_map, storage->percpu_buf, value, size, map_flags); +unlock: rcu_read_unlock(); - return 0; + return err; } int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, @@ -213,17 +214,21 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, { struct bpf_cgroup_storage_map *map = map_to_storage(_map); struct bpf_cgroup_storage *storage; - int cpu, off = 0; u32 size; + int err; - if (map_flags != BPF_ANY && map_flags != BPF_EXIST) + if ((u32)map_flags & ~(BPF_ANY | BPF_EXIST | BPF_F_CPU | BPF_F_ALL_CPUS)) return -EINVAL; + err = bpf_map_check_cpu_flags(map_flags, true); + if (err) + return err; + rcu_read_lock(); storage = cgroup_storage_lookup(map, key, false); if (!storage) { - rcu_read_unlock(); - return -ENOENT; + err = -ENOENT; + goto unlock; } /* the user space will provide round_up(value_size, 8) bytes that @@ -233,13 +238,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, * so no kernel data leaks possible */ size = round_up(_map->value_size, 8); - for_each_possible_cpu(cpu) { - bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), - value + off, size); - off += size; - } + bpf_percpu_copy_from_user(_map, storage->percpu_buf, value, size, map_flags); +unlock: rcu_read_unlock(); - return 0; + return err; } static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 91ab9bdd1da38..0c88d7f6f1491 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -318,7 +318,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { - err = bpf_percpu_cgroup_storage_copy(map, key, value); + err = bpf_percpu_cgroup_storage_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { err = bpf_stackmap_copy(map, key, value); } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { -- 2.50.1 Add libbpf support for the BPF_F_CPU flag for percpu maps by embedding the cpu info into the high 32 bits of: 1. **flags**: bpf_map_lookup_elem_flags(), bpf_map__lookup_elem(), bpf_map_update_elem() and bpf_map__update_elem() 2. **opts->elem_flags**: bpf_map_lookup_batch() and bpf_map_update_batch() And the flag can be BPF_F_ALL_CPUS, but cannot be 'BPF_F_CPU | BPF_F_ALL_CPUS'. Behavior: * If the flag is BPF_F_ALL_CPUS, the update is applied across all CPUs. * If the flag is BPF_F_CPU, it updates value only to the specified CPU. * If the flag is BPF_F_CPU, lookup value only from the specified CPU. * lookup does not support BPF_F_ALL_CPUS. Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.h | 8 ++++++++ tools/lib/bpf/libbpf.c | 33 +++++++++++++++++++++++++++------ tools/lib/bpf/libbpf.h | 21 ++++++++------------- 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 7252150e7ad35..28acb15e982b3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -286,6 +286,14 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, * Update spin_lock-ed map elements. This must be * specified if the map value contains a spinlock. * + * **BPF_F_CPU** + * As for percpu maps, update value on the specified CPU. And the cpu + * info is embedded into the high 32 bits of **opts->elem_flags**. + * + * **BPF_F_ALL_CPUS** + * As for percpu maps, update value across all CPUs. This flag cannot + * be used with BPF_F_CPU at the same time. + * * @param fd BPF map file descriptor * @param keys pointer to an array of *count* keys * @param values pointer to an array of *count* values diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fe4fc5438678c..e6a6341da861a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10603,7 +10603,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) } static int validate_map_op(const struct bpf_map *map, size_t key_sz, - size_t value_sz, bool check_value_sz) + size_t value_sz, bool check_value_sz, __u64 flags) { if (!map_is_created(map)) /* map is not yet created */ return -ENOENT; @@ -10629,6 +10629,27 @@ static int validate_map_op(const struct bpf_map *map, size_t key_sz, case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { int num_cpu = libbpf_num_possible_cpus(); size_t elem_sz = roundup(map->def.value_size, 8); + __u32 cpu; + + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) { + if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) { + pr_warn("map '%s': can't use BPF_F_CPU and BPF_F_ALL_CPUS at the same time\n", + map->name); + return -EINVAL; + } + cpu = flags >> 32; + if (cpu >= num_cpu) { + pr_warn("map '%s': cpu %u in flags cannot be GE num cpus %d\n", + map->name, cpu, num_cpu); + return -ERANGE; + } + if (value_sz != elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %zu\n", + map->name, value_sz, elem_sz); + return -EINVAL; + } + break; + } if (value_sz != num_cpu * elem_sz) { pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", @@ -10654,7 +10675,7 @@ int bpf_map__lookup_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10667,7 +10688,7 @@ int bpf_map__update_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10679,7 +10700,7 @@ int bpf_map__delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, flags); if (err) return libbpf_err(err); @@ -10692,7 +10713,7 @@ int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10704,7 +10725,7 @@ int bpf_map__get_next_key(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, 0); if (err) return libbpf_err(err); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 2e91148d9b44d..f221dc5c6ba41 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1196,12 +1196,13 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory in which looked up value will be stored * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. + * definition's **value_size**. For per-CPU BPF maps, value size can be + * `round_up(value_size, 8)` if **BPF_F_CPU** or **BPF_F_ALL_CPUS** is + * specified in **flags**, otherwise a product of BPF map value size and number + * of possible CPUs in the system (could be fetched with + * **libbpf_num_possible_cpus()**). Note else that for per-CPU values value + * size has to be aligned up to closest 8 bytes, so expected size is: + * `round_up(value_size, 8) * libbpf_num_possible_cpus()`. * @flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * @@ -1219,13 +1220,7 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, * @param key pointer to memory containing bytes of the key * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory containing bytes of the value - * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. + * @param value_sz refer to **bpf_map__lookup_elem**'s description.' * @flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * -- 2.50.1 Add test coverage for the new BPF_F_CPU and BPF_F_ALL_CPUS flags support in percpu maps. The following APIs are exercised: * bpf_map_update_batch() * bpf_map_lookup_batch() * bpf_map_update_elem() * bpf_map__update_elem() * bpf_map_lookup_elem_flags() * bpf_map__lookup_elem() cd tools/testing/selftests/bpf/ ./test_progs -t percpu_alloc 253/13 percpu_alloc/cpu_flag_percpu_array:OK 253/14 percpu_alloc/cpu_flag_percpu_hash:OK 253/15 percpu_alloc/cpu_flag_lru_percpu_hash:OK 253/16 percpu_alloc/cpu_flag_percpu_cgroup_storage:OK 253 percpu_alloc:OK Summary: 1/16 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Leon Hwang --- .../selftests/bpf/prog_tests/percpu_alloc.c | 224 ++++++++++++++++++ .../selftests/bpf/progs/percpu_alloc_array.c | 32 +++ 2 files changed, 256 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c index 343da65864d6d..98b6e8cc7ae60 100644 --- a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c +++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include "cgroup_helpers.h" #include "percpu_alloc_array.skel.h" #include "percpu_alloc_cgrp_local_storage.skel.h" #include "percpu_alloc_fail.skel.h" @@ -115,6 +116,221 @@ static void test_failure(void) { RUN_TESTS(percpu_alloc_fail); } +static void test_percpu_map_op_cpu_flag(struct bpf_map *map, void *keys, size_t key_sz, + u32 max_entries, bool test_batch) +{ + int i, j, cpu, map_fd, value_size, nr_cpus, err; + u64 *values = NULL, batch = 0, flags; + const u64 value = 0xDEADC0DE; + size_t value_sz = sizeof(u64); + u32 count; + LIBBPF_OPTS(bpf_map_batch_opts, batch_opts); + + nr_cpus = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus")) + return; + + value_size = value_sz * nr_cpus; + values = calloc(max_entries, value_size); + if (!ASSERT_OK_PTR(values, "calloc values")) + goto out; + memset(values, 0, value_size * max_entries); + + map_fd = bpf_map__fd(map); + flags = (u64)nr_cpus << 32 | BPF_F_CPU; + err = bpf_map_update_elem(map_fd, keys, values, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_elem_opts -ERANGE")) + goto out; + + err = bpf_map__update_elem(map, keys, key_sz, values, value_sz, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map__update_elem_opts -ERANGE")) + goto out; + + err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_lookup_elem_opts -ERANGE")) + goto out; + + err = bpf_map__lookup_elem(map, keys, key_sz, values, value_sz, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map__lookup_elem_opts -ERANGE")) + goto out; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + /* clear value on all cpus */ + values[0] = 0; + flags = BPF_F_ALL_CPUS; + for (i = 0; i < max_entries; i++) { + err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__update_elem")) + goto out; + } + + /* update value on specified cpu */ + for (i = 0; i < max_entries; i++) { + values[0] = value; + flags = (u64)cpu << 32 | BPF_F_CPU; + err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__update_elem specified cpu")) + goto out; + + /* lookup then check value on CPUs */ + for (j = 0; j < nr_cpus; j++) { + flags = (u64)j << 32 | BPF_F_CPU; + err = bpf_map__lookup_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__lookup_elem specified cpu")) + goto out; + if (!ASSERT_EQ(values[0], j != cpu ? 0 : value, + "bpf_map__lookup_elem value on specified cpu")) + goto out; + } + } + } + + if (!test_batch) + goto out; + + batch_opts.elem_flags = (u64)nr_cpus << 32 | BPF_F_CPU; + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &batch_opts); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_batch -ERANGE")) + goto out; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + memset(values, 0, max_entries * value_size); + + /* clear values across all CPUs */ + batch_opts.elem_flags = BPF_F_ALL_CPUS; + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &batch_opts); + if (!ASSERT_OK(err, "bpf_map_update_batch all cpus")) + goto out; + + /* update values on specified CPU */ + for (i = 0; i < max_entries; i++) + values[i] = value; + + batch_opts.elem_flags = (u64)cpu << 32 | BPF_F_CPU; + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &batch_opts); + if (!ASSERT_OK(err, "bpf_map_update_batch specified cpu")) + goto out; + + /* lookup values on specified CPU */ + memset(values, 0, max_entries * value_sz); + err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values, &count, &batch_opts); + if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch specified cpu")) + goto out; + + for (i = 0; i < max_entries; i++) + if (!ASSERT_EQ(values[i], value, "value on specified cpu")) + goto out; + + /* lookup values from all CPUs */ + batch_opts.elem_flags = 0; + memset(values, 0, max_entries * value_size); + err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values, &count, &batch_opts); + if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch all cpus")) + goto out; + + for (i = 0; i < max_entries; i++) { + for (j = 0; j < nr_cpus; j++) { + if (!ASSERT_EQ(values[i*nr_cpus + j], j != cpu ? 0 : value, + "value on specified cpu")) + goto out; + } + } + } + +out: + if (values) + free(values); +} + +static void test_percpu_map_cpu_flag(enum bpf_map_type map_type) +{ + struct percpu_alloc_array *skel; + size_t key_sz = sizeof(int); + int *keys = NULL, i, err; + struct bpf_map *map; + u32 max_entries; + + skel = percpu_alloc_array__open(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open")) + return; + + map = skel->maps.percpu; + bpf_map__set_type(map, map_type); + + err = percpu_alloc_array__load(skel); + if (!ASSERT_OK(err, "test_percpu_alloc__load")) + goto out; + + max_entries = bpf_map__max_entries(map); + keys = calloc(max_entries, key_sz); + if (!ASSERT_OK_PTR(keys, "calloc keys")) + goto out; + + for (i = 0; i < max_entries; i++) + keys[i] = i; + + test_percpu_map_op_cpu_flag(map, keys, key_sz, max_entries, true); +out: + if (keys) + free(keys); + percpu_alloc_array__destroy(skel); +} + +static void test_percpu_array_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_ARRAY); +} + +static void test_percpu_hash_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_HASH); +} + +static void test_lru_percpu_hash_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_LRU_PERCPU_HASH); +} + +static void test_percpu_cgroup_storage_cpu_flag(void) +{ + struct bpf_cgroup_storage_key key; + struct percpu_alloc_array *skel; + int cgroup = -1, prog_fd, err; + struct bpf_map *map; + + skel = percpu_alloc_array__open_and_load(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open_and_load")) + return; + + cgroup = create_and_get_cgroup("/cg_percpu"); + if (!ASSERT_GE(cgroup, 0, "create_and_get_cgroup")) + goto out; + + err = join_cgroup("/cg_percpu"); + if (!ASSERT_OK(err, "join_cgroup")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.cgroup_egress); + err = bpf_prog_attach(prog_fd, cgroup, BPF_CGROUP_INET_EGRESS, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + map = skel->maps.percpu_cgroup_storage; + err = bpf_map_get_next_key(bpf_map__fd(map), NULL, &key); + if (!ASSERT_OK(err, "bpf_map_get_next_key")) + goto out; + + test_percpu_map_op_cpu_flag(map, &key, sizeof(key), 1, false); +out: + bpf_prog_detach2(-1, cgroup, BPF_CGROUP_INET_EGRESS); + close(cgroup); + cleanup_cgroup_environment(); + percpu_alloc_array__destroy(skel); +} + void test_percpu_alloc(void) { if (test__start_subtest("array")) @@ -125,4 +341,12 @@ void test_percpu_alloc(void) test_cgrp_local_storage(); if (test__start_subtest("failure_tests")) test_failure(); + if (test__start_subtest("cpu_flag_percpu_array")) + test_percpu_array_cpu_flag(); + if (test__start_subtest("cpu_flag_percpu_hash")) + test_percpu_hash_cpu_flag(); + if (test__start_subtest("cpu_flag_lru_percpu_hash")) + test_lru_percpu_hash_cpu_flag(); + if (test__start_subtest("cpu_flag_percpu_cgroup_storage")) + test_percpu_cgroup_storage_cpu_flag(); } diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c index 37c2d2608ec0b..427301909c349 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c @@ -187,4 +187,36 @@ int BPF_PROG(test_array_map_10) return 0; } +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, u64); +} percpu SEC(".maps"); + +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(test_percpu_array, int x) +{ + u64 value = 0xDEADC0DE; + int key = 0; + + bpf_map_update_elem(&percpu, &key, &value, BPF_ANY); + return 0; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, u64); +} percpu_cgroup_storage SEC(".maps"); + +SEC("cgroup_skb/egress") +int cgroup_egress(struct __sk_buff *skb) +{ + u64 *val = bpf_get_local_storage(&percpu_cgroup_storage, 0); + + __sync_fetch_and_add(val, 1); + return 1; +} + char _license[] SEC("license") = "GPL"; -- 2.50.1