It is to unify map flags checking for lookup, update, lookup_batch and update_batch. Therefore, it will be convenient to check BPF_F_CPU flag in this helper function for them in next patch. Signed-off-by: Leon Hwang --- kernel/bpf/syscall.c | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0fbfa8532c392..19f7f5de5e7dc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1654,6 +1654,17 @@ static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) return NULL; } +static int check_map_flags(struct bpf_map *map, u64 flags, bool check_flag) +{ + if (check_flag && (flags & ~BPF_F_LOCK)) + return -EINVAL; + + if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL; + + return 0; +} + /* last field in 'union bpf_attr' used by this command */ #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags @@ -1669,9 +1680,6 @@ static int map_lookup_elem(union bpf_attr *attr) if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL; - if (attr->flags & ~BPF_F_LOCK) - return -EINVAL; - CLASS(fd, f)(attr->map_fd); map = __bpf_map_get(f); if (IS_ERR(map)) @@ -1679,9 +1687,9 @@ static int map_lookup_elem(union bpf_attr *attr) if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) return -EPERM; - if ((attr->flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) - return -EINVAL; + err = check_map_flags(map, attr->flags, true); + if (err) + return err; key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) @@ -1744,11 +1752,9 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - if ((attr->flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { - err = -EINVAL; + err = check_map_flags(map, attr->flags, false); + if (err) goto err_put; - } key = ___bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { @@ -1952,13 +1958,9 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, void *key, *value; int err = 0; - if (attr->batch.elem_flags & ~BPF_F_LOCK) - return -EINVAL; - - if ((attr->batch.elem_flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { - return -EINVAL; - } + err = check_map_flags(map, attr->batch.elem_flags, true); + if (err) + return err; value_size = bpf_map_value_size(map); @@ -2015,12 +2017,9 @@ int generic_map_lookup_batch(struct bpf_map *map, u32 value_size, cp, max_count; int err; - if (attr->batch.elem_flags & ~BPF_F_LOCK) - return -EINVAL; - - if ((attr->batch.elem_flags & BPF_F_LOCK) && - !btf_record_has_field(map->record, BPF_SPIN_LOCK)) - return -EINVAL; + err = check_map_flags(map, attr->batch.elem_flags, true); + if (err) + return err; value_size = bpf_map_value_size(map); -- 2.50.1 Introduce support for the BPF_F_ALL_CPUS flag in percpu_array maps to allow updating values for all CPUs with a single value. Introduce support for the BPF_F_CPU flag in percpu_array maps to allow updating value for specified CPU. This enhancement enables: * Efficient update values across all CPUs with a single value when BPF_F_ALL_CPUS is set for update_elem and update_batch APIs. * Targeted update or lookup for a specified CPU when BPF_F_CPU is set. The BPF_F_CPU flag is passed via: * map_flags of lookup_elem and update_elem APIs along with embedded cpu field. * elem_flags of lookup_batch and update_batch APIs along with embedded cpu field. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 3 +- include/uapi/linux/bpf.h | 2 ++ kernel/bpf/arraymap.c | 56 ++++++++++++++++++++++++++-------- kernel/bpf/syscall.c | 27 ++++++++++------ tools/include/uapi/linux/bpf.h | 2 ++ 5 files changed, 67 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8f6e87f0f3a89..b2191b1e455a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2697,7 +2697,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env, struct bpf_func_state *callee); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, + u64 flags); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 233de8677382e..be1fdc5042744 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1372,6 +1372,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3d080916faf97..1efa730105e24 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -295,17 +295,24 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); } -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; - int cpu, off = 0; - u32 size; + u32 size, cpu; + int off = 0; if (unlikely(index >= array->map.max_entries)) return -ENOENT; + if (unlikely((u32)flags & ~BPF_F_CPU)) + return -EINVAL; + + cpu = flags >> 32; + if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus())) + return -ERANGE; + /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas * will not leak any kernel data @@ -313,10 +320,15 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; - for_each_possible_cpu(cpu) { - copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); - check_and_init_map_value(map, value + off); - off += size; + if (flags & BPF_F_CPU) { + copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value); + } else { + for_each_possible_cpu(cpu) { + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value + off); + off += size; + } } rcu_read_unlock(); return 0; @@ -385,14 +397,22 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); + const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS; u32 index = *(u32 *)key; void __percpu *pptr; - int cpu, off = 0; - u32 size; + u32 size, cpu; + int off = 0; - if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((u32)map_flags > BPF_F_ALL_CPUS)) /* unknown flags */ return -EINVAL; + if (unlikely((map_flags & cpu_flags) == cpu_flags)) + return -EINVAL; + + cpu = map_flags >> 32; + if (unlikely((map_flags & BPF_F_CPU) && cpu >= num_possible_cpus())) + /* invalid cpu */ + return -ERANGE; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ @@ -411,10 +431,20 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; - for_each_possible_cpu(cpu) { - copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); + if (map_flags & BPF_F_CPU) { + copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value); bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu)); - off += size; + } else { + for_each_possible_cpu(cpu) { + copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); + /* same user-provided value is used if + * BPF_F_ALL_CPUS is specified, otherwise value is + * an array of per-cpu values. + */ + if (!(map_flags & BPF_F_ALL_CPUS)) + off += size; + bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu)); + } } rcu_read_unlock(); return 0; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 19f7f5de5e7dc..6251ac9bc7e42 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -131,9 +131,11 @@ bool bpf_map_write_active(const struct bpf_map *map) return atomic64_read(&map->writecnt) != 0; } -static u32 bpf_map_value_size(const struct bpf_map *map) +static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) { - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY && (flags & (BPF_F_CPU | BPF_F_ALL_CPUS))) + return round_up(map->value_size, 8); + else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) @@ -314,7 +316,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { - err = bpf_percpu_array_copy(map, key, value); + err = bpf_percpu_array_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { err = bpf_percpu_cgroup_storage_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { @@ -1656,12 +1658,19 @@ static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) static int check_map_flags(struct bpf_map *map, u64 flags, bool check_flag) { - if (check_flag && (flags & ~BPF_F_LOCK)) + if (check_flag && ((u32)flags & ~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS))) return -EINVAL; if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) return -EINVAL; + if (!(flags & BPF_F_CPU) && flags >> 32) + return -EINVAL; + + if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) + return -EINVAL; + return 0; } @@ -1695,7 +1704,7 @@ static int map_lookup_elem(union bpf_attr *attr) if (IS_ERR(key)) return PTR_ERR(key); - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->flags); err = -ENOMEM; value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); @@ -1762,7 +1771,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->flags); value = kvmemdup_bpfptr(uvalue, value_size); if (IS_ERR(value)) { err = PTR_ERR(value); @@ -1962,7 +1971,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, if (err) return err; - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->batch.elem_flags); max_count = attr->batch.count; if (!max_count) @@ -2021,7 +2030,7 @@ int generic_map_lookup_batch(struct bpf_map *map, if (err) return err; - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->batch.elem_flags); max_count = attr->batch.count; if (!max_count) @@ -2143,7 +2152,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) goto err_put; } - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, 0); err = -ENOMEM; value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 233de8677382e..be1fdc5042744 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1372,6 +1372,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ -- 2.50.1 Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash maps to allow updating values for all CPUs with a single value. Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash maps to allow updating value for specified CPU. This enhancement enables: * Efficient update values across all CPUs with a single value when BPF_F_ALL_CPUS is set for update_elem and update_batch APIs. * Targeted update or lookup for a specified CPU when BPF_F_CPU is set. The BPF_F_CPU flag is passed via: * map_flags of lookup_elem and update_elem APIs along with embedded cpu field. * elem_flags of lookup_batch and update_batch APIs along with embedded cpu field. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 54 +++++++++++++++++++- kernel/bpf/arraymap.c | 29 ++++------- kernel/bpf/hashtab.c | 111 +++++++++++++++++++++++++++++------------- kernel/bpf/syscall.c | 30 +++--------- 4 files changed, 147 insertions(+), 77 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b2191b1e455a6..dc715eef9cbf4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2696,7 +2696,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, @@ -3710,4 +3710,56 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char * const char **linep, int *nump); struct bpf_prog *bpf_prog_find_from_stack(void); +static inline int bpf_map_check_cpu_flags(u64 flags, bool check_all_cpus) +{ + const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS; + u32 cpu; + + if (check_all_cpus) { + if (unlikely((u32)flags > BPF_F_ALL_CPUS)) + /* unknown flags */ + return -EINVAL; + if (unlikely((flags & cpu_flags) == cpu_flags)) + return -EINVAL; + } else { + if (unlikely((u32)flags & ~BPF_F_CPU)) + return -EINVAL; + } + + cpu = flags >> 32; + if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus())) + return -ERANGE; + + return 0; +} + +static inline bool bpf_map_support_cpu_flags(enum bpf_map_type map_type) +{ + switch (map_type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + return true; + default: + return false; + } +} + +static inline int bpf_map_check_flags(struct bpf_map *map, u64 flags, bool check_flag) +{ + if (check_flag && ((u32)flags & ~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS))) + return -EINVAL; + + if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL; + + if (!(flags & BPF_F_CPU) && flags >> 32) + return -EINVAL; + + if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && !bpf_map_support_cpu_flags(map->map_type)) + return -EINVAL; + + return 0; +} + #endif /* _LINUX_BPF_H */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1efa730105e24..f7646bcabb3c8 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -300,18 +300,15 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; + int off = 0, err; u32 size, cpu; - int off = 0; if (unlikely(index >= array->map.max_entries)) return -ENOENT; - if (unlikely((u32)flags & ~BPF_F_CPU)) - return -EINVAL; - - cpu = flags >> 32; - if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus())) - return -ERANGE; + err = bpf_map_check_cpu_flags(flags, false); + if (unlikely(err)) + return err; /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas @@ -321,6 +318,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; if (flags & BPF_F_CPU) { + cpu = flags >> 32; copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu)); check_and_init_map_value(map, value); } else { @@ -397,22 +395,14 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); - const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS; u32 index = *(u32 *)key; void __percpu *pptr; + int off = 0, err; u32 size, cpu; - int off = 0; - - if (unlikely((u32)map_flags > BPF_F_ALL_CPUS)) - /* unknown flags */ - return -EINVAL; - if (unlikely((map_flags & cpu_flags) == cpu_flags)) - return -EINVAL; - cpu = map_flags >> 32; - if (unlikely((map_flags & BPF_F_CPU) && cpu >= num_possible_cpus())) - /* invalid cpu */ - return -ERANGE; + err = bpf_map_check_cpu_flags(map_flags, true); + if (unlikely(err)) + return err; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ @@ -432,6 +422,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; if (map_flags & BPF_F_CPU) { + cpu = map_flags >> 32; copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value); bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu)); } else { diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 71f9931ac64cd..34a35cdade425 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -937,24 +937,39 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) } static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, - void *value, bool onallcpus) + void *value, bool onallcpus, u64 map_flags) { + int cpu = map_flags & BPF_F_CPU ? map_flags >> 32 : 0; + int current_cpu = raw_smp_processor_id(); + if (!onallcpus) { /* copy true value_size bytes */ - copy_map_value(&htab->map, this_cpu_ptr(pptr), value); + copy_map_value(&htab->map, (map_flags & BPF_F_CPU) && cpu != current_cpu ? + per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value); } else { u32 size = round_up(htab->map.value_size, 8); - int off = 0, cpu; + int off = 0; + + if (map_flags & BPF_F_CPU) { + copy_map_value_long(&htab->map, cpu != current_cpu ? + per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value); + return; + } for_each_possible_cpu(cpu) { copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); - off += size; + /* same user-provided value is used if + * BPF_F_ALL_CPUS is specified, otherwise value is + * an array of per-cpu values. + */ + if (!(map_flags & BPF_F_ALL_CPUS)) + off += size; } } } static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, - void *value, bool onallcpus) + void *value, bool onallcpus, u64 map_flags) { /* When not setting the initial value on all cpus, zero-fill element * values for other cpus. Otherwise, bpf program has no way to ensure @@ -972,7 +987,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); } } else { - pcpu_copy_value(htab, pptr, value, onallcpus); + pcpu_copy_value(htab, pptr, value, onallcpus, map_flags); } } @@ -984,7 +999,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, - struct htab_elem *old_elem) + struct htab_elem *old_elem, u64 map_flags) { u32 size = htab->map.value_size; bool prealloc = htab_is_prealloc(htab); @@ -1042,7 +1057,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, pptr = *(void __percpu **)ptr; } - pcpu_init_value(htab, pptr, value, onallcpus); + pcpu_init_value(htab, pptr, value, onallcpus, map_flags); if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); @@ -1147,7 +1162,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, } l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, - l_old); + l_old, map_flags); if (IS_ERR(l_new)) { /* all pre-allocated elements are in use or memory exhausted */ ret = PTR_ERR(l_new); @@ -1263,9 +1278,15 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, u32 key_size, hash; int ret; - if (unlikely(map_flags > BPF_EXIST)) - /* unknown flags */ - return -EINVAL; + if (percpu) { + ret = bpf_map_check_cpu_flags(map_flags, true); + if (unlikely(ret)) + return ret; + } else { + if (unlikely(map_flags > BPF_EXIST)) + /* unknown flags */ + return -EINVAL; + } WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); @@ -1291,7 +1312,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, /* Update value in-place */ if (percpu) { pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), - value, onallcpus); + value, onallcpus, map_flags); } else { void **inner_map_pptr = htab_elem_value(l_old, key_size); @@ -1300,7 +1321,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, } } else { l_new = alloc_htab_elem(htab, key, value, key_size, - hash, percpu, onallcpus, NULL); + hash, percpu, onallcpus, NULL, map_flags); if (IS_ERR(l_new)) { ret = PTR_ERR(l_new); goto err; @@ -1326,9 +1347,9 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, u32 key_size, hash; int ret; - if (unlikely(map_flags > BPF_EXIST)) - /* unknown flags */ - return -EINVAL; + ret = bpf_map_check_cpu_flags(map_flags, true); + if (unlikely(ret)) + return ret; WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && !rcu_read_lock_bh_held()); @@ -1366,10 +1387,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, /* per-cpu hash map can update value in-place */ pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), - value, onallcpus); + value, onallcpus, map_flags); } else { pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), - value, onallcpus); + value, onallcpus, map_flags); hlist_nulls_add_head_rcu(&l_new->hash_node, head); l_new = NULL; } @@ -1698,9 +1719,16 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, int ret = 0; elem_map_flags = attr->batch.elem_flags; - if ((elem_map_flags & ~BPF_F_LOCK) || - ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) - return -EINVAL; + if (!do_delete && is_percpu) { + ret = bpf_map_check_flags(map, elem_map_flags, false); + if (ret) + return ret; + } else { + if ((elem_map_flags & ~BPF_F_LOCK) || + ((elem_map_flags & BPF_F_LOCK) && + !btf_record_has_field(map->record, BPF_SPIN_LOCK))) + return -EINVAL; + } map_flags = attr->batch.flags; if (map_flags) @@ -1806,10 +1834,17 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, void __percpu *pptr; pptr = htab_elem_get_ptr(l, map->key_size); - for_each_possible_cpu(cpu) { - copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); - check_and_init_map_value(&htab->map, dst_val + off); - off += size; + if (!do_delete && (elem_map_flags & BPF_F_CPU)) { + cpu = elem_map_flags >> 32; + copy_map_value_long(&htab->map, dst_val, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(&htab->map, dst_val); + } else { + for_each_possible_cpu(cpu) { + copy_map_value_long(&htab->map, dst_val + off, + per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(&htab->map, dst_val + off); + off += size; + } } } else { value = htab_elem_value(l, key_size); @@ -2365,14 +2400,18 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k return NULL; } -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags) { + int ret, cpu, off = 0; struct htab_elem *l; void __percpu *pptr; - int ret = -ENOENT; - int cpu, off = 0; u32 size; + ret = bpf_map_check_cpu_flags(map_flags, false); + if (unlikely(ret)) + return ret; + ret = -ENOENT; + /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas * will not leak any kernel data @@ -2386,10 +2425,16 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) * eviction heuristics when user space does a map walk. */ pptr = htab_elem_get_ptr(l, map->key_size); - for_each_possible_cpu(cpu) { - copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); - check_and_init_map_value(map, value + off); - off += size; + if (map_flags & BPF_F_CPU) { + cpu = map_flags >> 32; + copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value); + } else { + for_each_possible_cpu(cpu) { + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value + off); + off += size; + } } ret = 0; out: diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6251ac9bc7e42..430f013f38f06 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -133,7 +133,7 @@ bool bpf_map_write_active(const struct bpf_map *map) static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) { - if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY && (flags & (BPF_F_CPU | BPF_F_ALL_CPUS))) + if (bpf_map_support_cpu_flags(map->map_type) && (flags & (BPF_F_CPU | BPF_F_ALL_CPUS))) return round_up(map->value_size, 8); else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -314,7 +314,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, bpf_disable_instrumentation(); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { - err = bpf_percpu_hash_copy(map, key, value); + err = bpf_percpu_hash_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { @@ -1656,24 +1656,6 @@ static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) return NULL; } -static int check_map_flags(struct bpf_map *map, u64 flags, bool check_flag) -{ - if (check_flag && ((u32)flags & ~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS))) - return -EINVAL; - - if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) - return -EINVAL; - - if (!(flags & BPF_F_CPU) && flags >> 32) - return -EINVAL; - - if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && - map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) - return -EINVAL; - - return 0; -} - /* last field in 'union bpf_attr' used by this command */ #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags @@ -1696,7 +1678,7 @@ static int map_lookup_elem(union bpf_attr *attr) if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) return -EPERM; - err = check_map_flags(map, attr->flags, true); + err = bpf_map_check_flags(map, attr->flags, true); if (err) return err; @@ -1761,7 +1743,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - err = check_map_flags(map, attr->flags, false); + err = bpf_map_check_flags(map, attr->flags, false); if (err) goto err_put; @@ -1967,7 +1949,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, void *key, *value; int err = 0; - err = check_map_flags(map, attr->batch.elem_flags, true); + err = bpf_map_check_flags(map, attr->batch.elem_flags, true); if (err) return err; @@ -2026,7 +2008,7 @@ int generic_map_lookup_batch(struct bpf_map *map, u32 value_size, cp, max_count; int err; - err = check_map_flags(map, attr->batch.elem_flags, true); + err = bpf_map_check_flags(map, attr->batch.elem_flags, true); if (err) return err; -- 2.50.1 Introduce BPF_F_ALL_CPUS flag support for percpu_cgroup_storage maps to allow updating values for all CPUs with a single value. Introduce BPF_F_CPU flag support for percpu_cgroup_storage maps to allow updating value for specified CPU. This enhancement enables: * Efficient update values across all CPUs with a single value when BPF_F_ALL_CPUS is set for update_elem API. * Targeted update or lookup for a specified CPU when BPF_F_CPU is set. The BPF_F_CPU flag is passed via map_flags of lookup_elem and update_elem APIs along with embedded cpu field. Signed-off-by: Leon Hwang --- include/linux/bpf-cgroup.h | 5 ++-- include/linux/bpf.h | 5 ++-- kernel/bpf/local_storage.c | 47 +++++++++++++++++++++++++++++--------- kernel/bpf/syscall.c | 12 ++++------ 4 files changed, 46 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index aedf573bdb426..1cb28660aa866 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -172,7 +172,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); -int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value, + u64 flags); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags); @@ -467,7 +468,7 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, - void *value) { + void *value, u64 flags) { return 0; } static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dc715eef9cbf4..2684ba32bba0a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3733,12 +3733,13 @@ static inline int bpf_map_check_cpu_flags(u64 flags, bool check_all_cpus) return 0; } -static inline bool bpf_map_support_cpu_flags(enum bpf_map_type map_type) +static inline bool bpf_map_is_percpu(enum bpf_map_type map_type) { switch (map_type) { case BPF_MAP_TYPE_PERCPU_ARRAY: case BPF_MAP_TYPE_PERCPU_HASH: case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: return true; default: return false; @@ -3756,7 +3757,7 @@ static inline int bpf_map_check_flags(struct bpf_map *map, u64 flags, bool check if (!(flags & BPF_F_CPU) && flags >> 32) return -EINVAL; - if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && !bpf_map_support_cpu_flags(map->map_type)) + if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && !bpf_map_is_percpu(map->map_type)) return -EINVAL; return 0; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index c93a756e035c0..ee60b8cee4e90 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -180,18 +180,22 @@ static long cgroup_storage_update_elem(struct bpf_map *map, void *key, } int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key, - void *value) + void *value, u64 map_flags) { struct bpf_cgroup_storage_map *map = map_to_storage(_map); struct bpf_cgroup_storage *storage; - int cpu, off = 0; + int cpu, off = 0, err; u32 size; + err = bpf_map_check_cpu_flags(map_flags, false); + if (err) + return err; + rcu_read_lock(); storage = cgroup_storage_lookup(map, key, false); if (!storage) { - rcu_read_unlock(); - return -ENOENT; + err = -ENOENT; + goto unlock; } /* per_cpu areas are zero-filled and bpf programs can only @@ -199,13 +203,19 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key, * will not leak any kernel data */ size = round_up(_map->value_size, 8); + if (map_flags & BPF_F_CPU) { + cpu = map_flags >> 32; + bpf_long_memcpy(value, per_cpu_ptr(storage->percpu_buf, cpu), size); + goto unlock; + } for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, per_cpu_ptr(storage->percpu_buf, cpu), size); off += size; } +unlock: rcu_read_unlock(); - return 0; + return err; } int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, @@ -213,17 +223,21 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, { struct bpf_cgroup_storage_map *map = map_to_storage(_map); struct bpf_cgroup_storage *storage; - int cpu, off = 0; + int cpu, off = 0, err; u32 size; - if (map_flags != BPF_ANY && map_flags != BPF_EXIST) + if ((u32)map_flags & ~(BPF_ANY | BPF_EXIST | BPF_F_CPU | BPF_F_ALL_CPUS)) return -EINVAL; + err = bpf_map_check_cpu_flags(map_flags, true); + if (err) + return err; + rcu_read_lock(); storage = cgroup_storage_lookup(map, key, false); if (!storage) { - rcu_read_unlock(); - return -ENOENT; + err = -ENOENT; + goto unlock; } /* the user space will provide round_up(value_size, 8) bytes that @@ -233,13 +247,24 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, * so no kernel data leaks possible */ size = round_up(_map->value_size, 8); + if (map_flags & BPF_F_CPU) { + cpu = map_flags >> 32; + bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), value, size); + goto unlock; + } for_each_possible_cpu(cpu) { bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), value + off, size); - off += size; + /* same user-provided value is used if + * BPF_F_ALL_CPUS is specified, otherwise value is + * an array of per-cpu values. + */ + if (!(map_flags & BPF_F_ALL_CPUS)) + off += size; } +unlock: rcu_read_unlock(); - return 0; + return err; } static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 430f013f38f06..3fc52cd0c12de 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -133,13 +133,9 @@ bool bpf_map_write_active(const struct bpf_map *map) static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) { - if (bpf_map_support_cpu_flags(map->map_type) && (flags & (BPF_F_CPU | BPF_F_ALL_CPUS))) - return round_up(map->value_size, 8); - else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) - return round_up(map->value_size, 8) * num_possible_cpus(); + if (bpf_map_is_percpu(map->map_type)) + return flags & (BPF_F_CPU | BPF_F_ALL_CPUS) ? round_up(map->value_size, 8) : + round_up(map->value_size, 8) * num_possible_cpus(); else if (IS_FD_MAP(map)) return sizeof(u32); else @@ -318,7 +314,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { - err = bpf_percpu_cgroup_storage_copy(map, key, value); + err = bpf_percpu_cgroup_storage_copy(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { err = bpf_stackmap_copy(map, key, value); } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { -- 2.50.1 Add libbpf support for the BPF_F_CPU flag for percpu maps by embedding the cpu info into the high 32 bits of: 1. **flags**: bpf_map_lookup_elem_flags(), bpf_map__lookup_elem(), bpf_map_update_elem() and bpf_map__update_elem() 2. **opts->elem_flags**: bpf_map_lookup_batch() and bpf_map_update_batch() And the flag can be BPF_F_ALL_CPUS, but cannot be 'BPF_F_CPU | BPF_F_ALL_CPUS'. Behavior: * If the flag is BPF_F_ALL_CPUS, the update is applied across all CPUs. * If the flag is BPF_F_CPU, it updates value only to the specified CPU. * If the flag is BPF_F_CPU, lookup value only from the specified CPU. * lookup does not support BPF_F_ALL_CPUS. Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.h | 8 ++++++++ tools/lib/bpf/libbpf.c | 25 +++++++++++++++++++------ tools/lib/bpf/libbpf.h | 21 ++++++++------------- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 7252150e7ad35..28acb15e982b3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -286,6 +286,14 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, * Update spin_lock-ed map elements. This must be * specified if the map value contains a spinlock. * + * **BPF_F_CPU** + * As for percpu maps, update value on the specified CPU. And the cpu + * info is embedded into the high 32 bits of **opts->elem_flags**. + * + * **BPF_F_ALL_CPUS** + * As for percpu maps, update value across all CPUs. This flag cannot + * be used with BPF_F_CPU at the same time. + * * @param fd BPF map file descriptor * @param keys pointer to an array of *count* keys * @param values pointer to an array of *count* values diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fe4fc5438678c..c949281984880 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10603,7 +10603,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) } static int validate_map_op(const struct bpf_map *map, size_t key_sz, - size_t value_sz, bool check_value_sz) + size_t value_sz, bool check_value_sz, __u64 flags) { if (!map_is_created(map)) /* map is not yet created */ return -ENOENT; @@ -10630,6 +10630,19 @@ static int validate_map_op(const struct bpf_map *map, size_t key_sz, int num_cpu = libbpf_num_possible_cpus(); size_t elem_sz = roundup(map->def.value_size, 8); + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) { + if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) + return -EINVAL; + if ((flags >> 32) >= num_cpu) + return -ERANGE; + if (value_sz != elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %zu\n", + map->name, value_sz, elem_sz); + return -EINVAL; + } + break; + } + if (value_sz != num_cpu * elem_sz) { pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); @@ -10654,7 +10667,7 @@ int bpf_map__lookup_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10667,7 +10680,7 @@ int bpf_map__update_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10679,7 +10692,7 @@ int bpf_map__delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, 0); if (err) return libbpf_err(err); @@ -10692,7 +10705,7 @@ int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, 0); if (err) return libbpf_err(err); @@ -10704,7 +10717,7 @@ int bpf_map__get_next_key(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, 0); if (err) return libbpf_err(err); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 2e91148d9b44d..6a972a8d060c3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1196,12 +1196,13 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory in which looked up value will be stored * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. + * definition's **value_size**. For per-CPU BPF maps, value size can be + * definition's **value_size** if **BPF_F_CPU** or **BPF_F_ALL_CPUS** is + * specified in **flags**, otherwise a product of BPF map value size and number + * of possible CPUs in the system (could be fetched with + * **libbpf_num_possible_cpus()**). Note else that for per-CPU values value + * size has to be aligned up to closest 8 bytes for alignment reasons, so + * expected size is: `round_up(value_size, 8) * libbpf_num_possible_cpus()`. * @flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * @@ -1219,13 +1220,7 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, * @param key pointer to memory containing bytes of the key * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory containing bytes of the value - * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. + * @param value_sz refer to **bpf_map__lookup_elem**'s description.' * @flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * -- 2.50.1 Add test coverage for the new BPF_F_CPU flag support in percpu maps. The following APIs are exercised: * bpf_map_update_batch() * bpf_map_lookup_batch() * bpf_map_update_elem() * bpf_map__update_elem() * bpf_map_lookup_elem_flags() * bpf_map__lookup_elem() cd tools/testing/selftests/bpf/ ./test_progs -t percpu_alloc 253/13 percpu_alloc/cpu_flag_percpu_array:OK 253/14 percpu_alloc/cpu_flag_percpu_hash:OK 253/15 percpu_alloc/cpu_flag_lru_percpu_hash:OK 253/16 percpu_alloc/cpu_flag_percpu_cgroup_storage:OK 253 percpu_alloc:OK Summary: 1/16 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Leon Hwang --- .../selftests/bpf/prog_tests/percpu_alloc.c | 224 ++++++++++++++++++ .../selftests/bpf/progs/percpu_alloc_array.c | 32 +++ 2 files changed, 256 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c index 343da65864d6d..98b6e8cc7ae60 100644 --- a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c +++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include "cgroup_helpers.h" #include "percpu_alloc_array.skel.h" #include "percpu_alloc_cgrp_local_storage.skel.h" #include "percpu_alloc_fail.skel.h" @@ -115,6 +116,221 @@ static void test_failure(void) { RUN_TESTS(percpu_alloc_fail); } +static void test_percpu_map_op_cpu_flag(struct bpf_map *map, void *keys, size_t key_sz, + u32 max_entries, bool test_batch) +{ + int i, j, cpu, map_fd, value_size, nr_cpus, err; + u64 *values = NULL, batch = 0, flags; + const u64 value = 0xDEADC0DE; + size_t value_sz = sizeof(u64); + u32 count; + LIBBPF_OPTS(bpf_map_batch_opts, batch_opts); + + nr_cpus = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus")) + return; + + value_size = value_sz * nr_cpus; + values = calloc(max_entries, value_size); + if (!ASSERT_OK_PTR(values, "calloc values")) + goto out; + memset(values, 0, value_size * max_entries); + + map_fd = bpf_map__fd(map); + flags = (u64)nr_cpus << 32 | BPF_F_CPU; + err = bpf_map_update_elem(map_fd, keys, values, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_elem_opts -ERANGE")) + goto out; + + err = bpf_map__update_elem(map, keys, key_sz, values, value_sz, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map__update_elem_opts -ERANGE")) + goto out; + + err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_lookup_elem_opts -ERANGE")) + goto out; + + err = bpf_map__lookup_elem(map, keys, key_sz, values, value_sz, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map__lookup_elem_opts -ERANGE")) + goto out; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + /* clear value on all cpus */ + values[0] = 0; + flags = BPF_F_ALL_CPUS; + for (i = 0; i < max_entries; i++) { + err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__update_elem")) + goto out; + } + + /* update value on specified cpu */ + for (i = 0; i < max_entries; i++) { + values[0] = value; + flags = (u64)cpu << 32 | BPF_F_CPU; + err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__update_elem specified cpu")) + goto out; + + /* lookup then check value on CPUs */ + for (j = 0; j < nr_cpus; j++) { + flags = (u64)j << 32 | BPF_F_CPU; + err = bpf_map__lookup_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__lookup_elem specified cpu")) + goto out; + if (!ASSERT_EQ(values[0], j != cpu ? 0 : value, + "bpf_map__lookup_elem value on specified cpu")) + goto out; + } + } + } + + if (!test_batch) + goto out; + + batch_opts.elem_flags = (u64)nr_cpus << 32 | BPF_F_CPU; + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &batch_opts); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_batch -ERANGE")) + goto out; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + memset(values, 0, max_entries * value_size); + + /* clear values across all CPUs */ + batch_opts.elem_flags = BPF_F_ALL_CPUS; + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &batch_opts); + if (!ASSERT_OK(err, "bpf_map_update_batch all cpus")) + goto out; + + /* update values on specified CPU */ + for (i = 0; i < max_entries; i++) + values[i] = value; + + batch_opts.elem_flags = (u64)cpu << 32 | BPF_F_CPU; + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &batch_opts); + if (!ASSERT_OK(err, "bpf_map_update_batch specified cpu")) + goto out; + + /* lookup values on specified CPU */ + memset(values, 0, max_entries * value_sz); + err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values, &count, &batch_opts); + if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch specified cpu")) + goto out; + + for (i = 0; i < max_entries; i++) + if (!ASSERT_EQ(values[i], value, "value on specified cpu")) + goto out; + + /* lookup values from all CPUs */ + batch_opts.elem_flags = 0; + memset(values, 0, max_entries * value_size); + err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values, &count, &batch_opts); + if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch all cpus")) + goto out; + + for (i = 0; i < max_entries; i++) { + for (j = 0; j < nr_cpus; j++) { + if (!ASSERT_EQ(values[i*nr_cpus + j], j != cpu ? 0 : value, + "value on specified cpu")) + goto out; + } + } + } + +out: + if (values) + free(values); +} + +static void test_percpu_map_cpu_flag(enum bpf_map_type map_type) +{ + struct percpu_alloc_array *skel; + size_t key_sz = sizeof(int); + int *keys = NULL, i, err; + struct bpf_map *map; + u32 max_entries; + + skel = percpu_alloc_array__open(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open")) + return; + + map = skel->maps.percpu; + bpf_map__set_type(map, map_type); + + err = percpu_alloc_array__load(skel); + if (!ASSERT_OK(err, "test_percpu_alloc__load")) + goto out; + + max_entries = bpf_map__max_entries(map); + keys = calloc(max_entries, key_sz); + if (!ASSERT_OK_PTR(keys, "calloc keys")) + goto out; + + for (i = 0; i < max_entries; i++) + keys[i] = i; + + test_percpu_map_op_cpu_flag(map, keys, key_sz, max_entries, true); +out: + if (keys) + free(keys); + percpu_alloc_array__destroy(skel); +} + +static void test_percpu_array_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_ARRAY); +} + +static void test_percpu_hash_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_HASH); +} + +static void test_lru_percpu_hash_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_LRU_PERCPU_HASH); +} + +static void test_percpu_cgroup_storage_cpu_flag(void) +{ + struct bpf_cgroup_storage_key key; + struct percpu_alloc_array *skel; + int cgroup = -1, prog_fd, err; + struct bpf_map *map; + + skel = percpu_alloc_array__open_and_load(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open_and_load")) + return; + + cgroup = create_and_get_cgroup("/cg_percpu"); + if (!ASSERT_GE(cgroup, 0, "create_and_get_cgroup")) + goto out; + + err = join_cgroup("/cg_percpu"); + if (!ASSERT_OK(err, "join_cgroup")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.cgroup_egress); + err = bpf_prog_attach(prog_fd, cgroup, BPF_CGROUP_INET_EGRESS, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + map = skel->maps.percpu_cgroup_storage; + err = bpf_map_get_next_key(bpf_map__fd(map), NULL, &key); + if (!ASSERT_OK(err, "bpf_map_get_next_key")) + goto out; + + test_percpu_map_op_cpu_flag(map, &key, sizeof(key), 1, false); +out: + bpf_prog_detach2(-1, cgroup, BPF_CGROUP_INET_EGRESS); + close(cgroup); + cleanup_cgroup_environment(); + percpu_alloc_array__destroy(skel); +} + void test_percpu_alloc(void) { if (test__start_subtest("array")) @@ -125,4 +341,12 @@ void test_percpu_alloc(void) test_cgrp_local_storage(); if (test__start_subtest("failure_tests")) test_failure(); + if (test__start_subtest("cpu_flag_percpu_array")) + test_percpu_array_cpu_flag(); + if (test__start_subtest("cpu_flag_percpu_hash")) + test_percpu_hash_cpu_flag(); + if (test__start_subtest("cpu_flag_lru_percpu_hash")) + test_lru_percpu_hash_cpu_flag(); + if (test__start_subtest("cpu_flag_percpu_cgroup_storage")) + test_percpu_cgroup_storage_cpu_flag(); } diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c index 37c2d2608ec0b..427301909c349 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c @@ -187,4 +187,36 @@ int BPF_PROG(test_array_map_10) return 0; } +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, u64); +} percpu SEC(".maps"); + +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(test_percpu_array, int x) +{ + u64 value = 0xDEADC0DE; + int key = 0; + + bpf_map_update_elem(&percpu, &key, &value, BPF_ANY); + return 0; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, u64); +} percpu_cgroup_storage SEC(".maps"); + +SEC("cgroup_skb/egress") +int cgroup_egress(struct __sk_buff *skb) +{ + u64 *val = bpf_get_local_storage(&percpu_cgroup_storage, 0); + + __sync_fetch_and_add(val, 1); + return 1; +} + char _license[] SEC("license") = "GPL"; -- 2.50.1