From: Mykyta Yatsenko <yatsenko@meta.com>

Add batch operations and BPF iterator support for BPF_MAP_TYPE_RHASH.

Batch operations:
 * rhtab_map_lookup_batch: Bulk lookup of elements by bucket
 * rhtab_map_lookup_and_delete_batch: Atomic bulk lookup and delete

The batch implementation iterates through buckets under RCU protection,
copying keys and values to userspace buffers. When the buffer fills
mid-bucket, it rolls back to the bucket boundary so the next call can
retry that bucket completely.

BPF iterator:
 * Uses rhashtable_walk_* API for safe iteration
 * Handles -EAGAIN during table resize transparently
 * Tracks skip_elems to resume iteration across read() calls

Also implements rhtab_map_mem_usage() to report memory consumption.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 kernel/bpf/hashtab.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 221 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 917f8f7d70c9ce265c13751e17e3b0bb4b598c45..142e5cf3be17c74d29ef1623be4ed3923c97dd2e 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -3065,19 +3065,159 @@ static long bpf_each_rhash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
 
 static u64 rhtab_map_mem_usage(const struct bpf_map *map)
 {
-	return 0;
+	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
+	struct bucket_table *tbl, *future_tbl;
+	u64 num_entries;
+	u64 usage = sizeof(struct bpf_rhtab);
+
+	num_entries = atomic_read(&rhtab->ht.nelems);
+	usage += rhtab->elem_size * num_entries;
+
+	/* Add rhashtable internal overhead: bucket_table + buckets array */
+	guard(rcu)();
+	tbl = rcu_dereference(rhtab->ht.tbl);
+	usage += sizeof(struct bucket_table) +
+		 sizeof(struct rhash_lock_head *) * tbl->size;
+
+	/* Account for future_tbl during resize */
+	future_tbl = rcu_dereference(tbl->future_tbl);
+	if (future_tbl)
+		usage += sizeof(struct bucket_table) +
+			 sizeof(struct rhash_lock_head *) * future_tbl->size;
+
+	return usage;
+}
+
+static int __rhtab_map_lookup_and_delete_batch(struct bpf_map *map,
+					       const union bpf_attr *attr,
+					       union bpf_attr __user *uattr,
+					       bool do_delete)
+{
+	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
+	void __user *uvalues = u64_to_user_ptr(attr->batch.values);
+	void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
+	void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+	void *keys = NULL, *values = NULL, *dst_key, *dst_val;
+	struct rhtab_elem **del_elems = NULL;
+	u32 slot, max_count, total, key_size, value_size, i;
+	struct bucket_table *tbl;
+	struct rhtab_elem *elem;
+	struct rhash_head *he;
+	u64 elem_map_flags, map_flags;
+	int ret = 0;
+
+	elem_map_flags = attr->batch.elem_flags;
+	if ((elem_map_flags & ~BPF_F_LOCK) ||
+	    ((elem_map_flags & BPF_F_LOCK) &&
+	     !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
+		return -EINVAL;
+
+	map_flags = attr->batch.flags;
+	if (map_flags)
+		return -EINVAL;
+
+	max_count = attr->batch.count;
+	if (!max_count)
+		return 0;
+
+	if (put_user(0, &uattr->batch.count))
+		return -EFAULT;
+
+	slot = 0;
+	if (ubatch && copy_from_user(&slot, ubatch, sizeof(slot)))
+		return -EFAULT;
+
+	key_size = map->key_size;
+	value_size = map->value_size;
+
+	keys = kvmalloc_array(max_count, key_size, GFP_USER | __GFP_NOWARN);
+	values = kvmalloc_array(max_count, value_size, GFP_USER | __GFP_NOWARN);
+	if (do_delete)
+		del_elems = kvmalloc_array(max_count, sizeof(void *), GFP_USER | __GFP_NOWARN);
+
+	if (!keys || !values || (do_delete && !del_elems)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	dst_key = keys;
+	dst_val = values;
+	total = 0;
+
+	rcu_read_lock();
+	tbl = rht_dereference_rcu(rhtab->ht.tbl, &rhtab->ht);
+
+	if (slot >= tbl->size) {
+		rcu_read_unlock();
+		ret = -ENOENT;
+		goto out;
+	}
+
+	for (; slot < tbl->size; slot++) {
+		u32 bucket_start = total;
+
+		rht_for_each_rcu(he, tbl, slot) {
+			/*
+			 * If buffer is full mid-bucket, undo this partial bucket
+			 * so next call retries it. I this is the first bucket return -ENOSPC
+			 */
+			if (total >= max_count) {
+				if (bucket_start == 0)
+					ret = -ENOSPC;
+				dst_key -= (total - bucket_start) * key_size;
+				dst_val -= (total - bucket_start) * value_size;
+				total = bucket_start;
+				goto done;
+			}
+			elem = container_of(he, struct rhtab_elem, node);
+			memcpy(dst_key, elem->data, key_size);
+			rhtab_read_elem_value(map, dst_val, elem, elem_map_flags);
+			check_and_init_map_value(map, dst_val);
+
+			if (do_delete)
+				del_elems[total] = elem;
+
+			dst_key += key_size;
+			dst_val += value_size;
+			total++;
+		}
+	}
+done:
+
+	if (do_delete) {
+		for (i = 0; i < total; i++)
+			rhtab_delete_elem(rhtab, del_elems[i]);
+	}
+
+	rcu_read_unlock();
+
+	if (total == 0)
+		goto out;
+
+	ret = copy_to_user(ukeys, keys, total * key_size);
+	ret = ret ? : copy_to_user(uvalues, values, total * value_size);
+	ret = ret ? : put_user(total, &uattr->batch.count);
+	ret = ret ? : copy_to_user(u64_to_user_ptr(attr->batch.out_batch), &slot, sizeof(slot));
+	if (ret)
+		ret = -EFAULT;
+
+out:
+	kvfree(keys);
+	kvfree(values);
+	kvfree(del_elems);
+	return ret;
 }
 
 static int rhtab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
 				  union bpf_attr __user *uattr)
 {
-	return 0;
+	return __rhtab_map_lookup_and_delete_batch(map, attr, uattr, false);
 }
 
 static int rhtab_map_lookup_and_delete_batch(struct bpf_map *map, const union bpf_attr *attr,
 					     union bpf_attr __user *uattr)
 {
-	return 0;
+	return __rhtab_map_lookup_and_delete_batch(map, attr, uattr, true);
 }
 
 struct bpf_iter_seq_rhash_map_info {
@@ -3090,30 +3230,105 @@ struct bpf_iter_seq_rhash_map_info {
 
 static void *bpf_rhash_map_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	return NULL;
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+	struct rhtab_elem *elem;
+	u32 skip = info->skip_elems;
+
+	rhashtable_walk_enter(&info->rhtab->ht, &info->iter);
+	rhashtable_walk_start(&info->iter);
+	info->iter_active = true;
+
+	/* Skip elements we've already processed in previous read calls */
+	while (skip > 0) {
+		elem = rhtab_iter_next(&info->iter);
+		if (!elem)
+			return NULL;
+		skip--;
+	}
+
+	elem = rhtab_iter_next(&info->iter);
+	if (!elem)
+		return NULL;
+	/*
+	 * if *pos is not 0, previously iteration failed on this elem,
+	 * so we are restarting it. That's why no need to increment *pos.
+	 */
+	if (*pos == 0)
+		++*pos;
+	return elem;
 }
 
 static void *bpf_rhash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	return NULL;
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+
+	++*pos;
+	++info->skip_elems;
+
+	return rhtab_iter_next(&info->iter);
+}
+
+static int __bpf_rhash_map_seq_show(struct seq_file *seq,
+				    struct rhtab_elem *elem)
+{
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+	struct bpf_iter__bpf_map_elem ctx = {};
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	int ret = 0;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, elem == NULL);
+	if (prog) {
+		ctx.meta = &meta;
+		ctx.map = info->map;
+		if (elem) {
+			ctx.key = elem->data;
+			ctx.value = rhtab_elem_value(elem);
+		}
+		ret = bpf_iter_run_prog(prog, &ctx);
+	}
+
+	return ret;
 }
 
 static int bpf_rhash_map_seq_show(struct seq_file *seq, void *v)
 {
-	return 0;
+	return __bpf_rhash_map_seq_show(seq, v);
 }
 
 static void bpf_rhash_map_seq_stop(struct seq_file *seq, void *v)
 {
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+
+	if (!v)
+		(void)__bpf_rhash_map_seq_show(seq, NULL);
+
+	if (info->iter_active) {
+		rhashtable_walk_stop(&info->iter);
+		rhashtable_walk_exit(&info->iter);
+		info->iter_active = false;
+	}
 }
 
 static int bpf_iter_init_rhash_map(void *priv_data, struct bpf_iter_aux_info *aux)
 {
+	struct bpf_iter_seq_rhash_map_info *info = priv_data;
+	struct bpf_map *map = aux->map;
+
+	bpf_map_inc_with_uref(map);
+	info->map = map;
+	info->rhtab = container_of(map, struct bpf_rhtab, map);
+	info->skip_elems = 0;
+	info->iter_active = false;
 	return 0;
 }
 
 static void bpf_iter_fini_rhash_map(void *priv_data)
 {
+	struct bpf_iter_seq_rhash_map_info *info = priv_data;
+
+	bpf_map_put_with_uref(info->map);
 }
 
 static const struct seq_operations bpf_rhash_map_seq_ops = {

-- 
2.53.0