The current implementation uses `idreg_debugfs_iter` in `struct kvm_arch` to track the sequence position. This effectively makes the iterator shared across all open file descriptors for the VM. This approach has significant drawbacks: - It enforces mutual exclusion, preventing concurrent reads of the debugfs file (returning -EBUSY). - It relies on storing transient iterator state in the long-lived VM structure (`kvm_arch`). - The use of `u8` for the iterator index imposes an implicit limit of 255 registers. While not currently exceeded, this is fragile against future architectural growth. Switching to `loff_t` eliminates this overflow risk. Refactor the implementation to use the standard `seq_file` iterator. Instead of storing state in `kvm_arch`, rely on the `pos` argument passed to the `start` and `next` callbacks, which tracks the logical index specific to the file descriptor. This change enables concurrent access and eliminates the `idreg_debugfs_iter` field from `struct kvm_arch`. Signed-off-by: Fuad Tabba --- arch/arm64/include/asm/kvm_host.h | 3 -- arch/arm64/kvm/sys_regs.c | 50 +++++-------------------------- 2 files changed, 8 insertions(+), 45 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac7f970c7883..643a199f6e9e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -373,9 +373,6 @@ struct kvm_arch { /* Maximum number of counters for the guest */ u8 nr_pmu_counters; - /* Iterator for idreg debugfs */ - u8 idreg_debugfs_iter; - /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 88a57ca36d96..035bf049e2e4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -4995,7 +4995,7 @@ static bool emulate_sys_reg(struct kvm_vcpu *vcpu, return false; } -static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) +static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, loff_t pos) { unsigned long i, idreg_idx = 0; @@ -5005,10 +5005,8 @@ static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) if (!is_vm_ftr_id_reg(reg_to_encoding(r))) continue; - if (idreg_idx == pos) + if (idreg_idx++ == pos) return r; - - idreg_idx++; } return NULL; @@ -5017,23 +5015,11 @@ static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) static void *idregs_debug_start(struct seq_file *s, loff_t *pos) { struct kvm *kvm = s->private; - u8 *iter; - mutex_lock(&kvm->arch.config_lock); + if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) + return NULL; - iter = &kvm->arch.idreg_debugfs_iter; - if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags) && - *iter == (u8)~0) { - *iter = *pos; - if (!idregs_debug_find(kvm, *iter)) - iter = NULL; - } else { - iter = ERR_PTR(-EBUSY); - } - - mutex_unlock(&kvm->arch.config_lock); - - return iter; + return (void *)idregs_debug_find(kvm, *pos); } static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) @@ -5042,37 +5028,19 @@ static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) (*pos)++; - if (idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter + 1)) { - kvm->arch.idreg_debugfs_iter++; - - return &kvm->arch.idreg_debugfs_iter; - } - - return NULL; + return (void *)idregs_debug_find(kvm, *pos); } static void idregs_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = s->private; - - if (IS_ERR(v)) - return; - - mutex_lock(&kvm->arch.config_lock); - - kvm->arch.idreg_debugfs_iter = ~0; - - mutex_unlock(&kvm->arch.config_lock); } static int idregs_debug_show(struct seq_file *s, void *v) { - const struct sys_reg_desc *desc; + const struct sys_reg_desc *desc = v; struct kvm *kvm = s->private; - desc = idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter); - - if (!desc->name) + if (!desc) return 0; seq_printf(s, "%20s:\t%016llx\n", @@ -5092,8 +5060,6 @@ DEFINE_SEQ_ATTRIBUTE(idregs_debug); void kvm_sys_regs_create_debugfs(struct kvm *kvm) { - kvm->arch.idreg_debugfs_iter = ~0; - debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, &idregs_debug_fops); } -- 2.53.0.rc1.225.gd81095ad13-goog The vgic-debug interface implementation uses XArray marks (`LPI_XA_MARK_DEBUG_ITER`) to "snapshot" LPIs at the start of iteration. This modifies global state for a read-only operation and complicates reference counting, leading to leaks if iteration is aborted or fails. Reimplement the iterator to use dynamic iteration logic: - Remove `lpi_idx` from `struct vgic_state_iter`. - Replace the XArray marking mechanism with dynamic iteration using `xa_find_after(..., XA_PRESENT)`. - Wrap XArray traversals in `rcu_read_lock()`/`rcu_read_unlock()` to ensure safety against concurrent modifications (e.g., LPI unmapping). - Handle potential races where an LPI is removed during iteration by gracefully skipping it in `show()`, rather than warning. - Remove the unused `LPI_XA_MARK_DEBUG_ITER` definition. This simplifies the lifecycle management of the iterator and prevents resource leaks associated with the marking mechanism, and paves the way for using a standard seq_file iterator. Signed-off-by: Fuad Tabba --- arch/arm64/kvm/vgic/vgic-debug.c | 68 ++++++++++---------------------- include/kvm/arm_vgic.h | 1 - 2 files changed, 20 insertions(+), 49 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bb92853d1fd3..ec3d0c1fe703 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -25,11 +25,9 @@ struct vgic_state_iter { int nr_cpus; int nr_spis; - int nr_lpis; int dist_id; int vcpu_id; unsigned long intid; - int lpi_idx; }; static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) @@ -45,13 +43,15 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) * Let the xarray drive the iterator after the last SPI, as the iterator * has exhausted the sequentially-allocated INTID space. */ - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) && - iter->nr_lpis) { - if (iter->lpi_idx < iter->nr_lpis) - xa_find_after(&dist->lpi_xa, &iter->intid, - VGIC_LPI_MAX_INTID, - LPI_XA_MARK_DEBUG_ITER); - iter->lpi_idx++; + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->intid == VGIC_LPI_MAX_INTID + 1) + return; + + rcu_read_lock(); + if (!xa_find_after(&dist->lpi_xa, &iter->intid, + VGIC_LPI_MAX_INTID, XA_PRESENT)) + iter->intid = VGIC_LPI_MAX_INTID + 1; + rcu_read_unlock(); return; } @@ -61,44 +61,21 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) iter->intid = 0; } -static int iter_mark_lpis(struct kvm *kvm) +static int vgic_count_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid, flags; struct vgic_irq *irq; + unsigned long intid; int nr_lpis = 0; - xa_lock_irqsave(&dist->lpi_xa, flags); - - xa_for_each(&dist->lpi_xa, intid, irq) { - if (!vgic_try_get_irq_ref(irq)) - continue; - - __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + rcu_read_lock(); + xa_for_each(&dist->lpi_xa, intid, irq) nr_lpis++; - } - - xa_unlock_irqrestore(&dist->lpi_xa, flags); + rcu_read_unlock(); return nr_lpis; } -static void iter_unmark_lpis(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid, flags; - struct vgic_irq *irq; - - xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { - xa_lock_irqsave(&dist->lpi_xa, flags); - __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); - xa_unlock_irqrestore(&dist->lpi_xa, flags); - - /* vgic_put_irq() expects to be called outside of the xa_lock */ - vgic_put_irq(kvm, irq); - } -} - static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, loff_t pos) { @@ -108,8 +85,6 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, iter->nr_cpus = nr_cpus; iter->nr_spis = kvm->arch.vgic.nr_spis; - if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - iter->nr_lpis = iter_mark_lpis(kvm); /* Fast forward to the right position if needed */ while (pos--) @@ -121,7 +96,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && - (!iter->nr_lpis || iter->lpi_idx > iter->nr_lpis); + iter->intid > VGIC_LPI_MAX_INTID; } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) @@ -178,7 +153,6 @@ static void vgic_debug_stop(struct seq_file *s, void *v) mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; - iter_unmark_lpis(kvm); kfree(iter); kvm->arch.vgic.iter = NULL; mutex_unlock(&kvm->arch.config_lock); @@ -188,13 +162,14 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist, struct vgic_state_iter *iter) { bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; + struct kvm *kvm = s->private; seq_printf(s, "Distributor\n"); seq_printf(s, "===========\n"); seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); if (v3) - seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis); + seq_printf(s, "nr_lpis:\t%d\n", vgic_count_lpis(kvm)); seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "\n"); @@ -291,16 +266,13 @@ static int vgic_debug_show(struct seq_file *s, void *v) if (iter->vcpu_id < iter->nr_cpus) vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); - /* - * Expect this to succeed, as iter_mark_lpis() takes a reference on - * every LPI to be visited. - */ if (iter->intid < VGIC_NR_PRIVATE_IRQS) irq = vgic_get_vcpu_irq(vcpu, iter->intid); else irq = vgic_get_irq(kvm, iter->intid); - if (WARN_ON_ONCE(!irq)) - return -EINVAL; + + if (!irq) + return 0; raw_spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b261fb3968d0..d32fafbd2907 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -300,7 +300,6 @@ struct vgic_dist { */ u64 propbaser; -#define LPI_XA_MARK_DEBUG_ITER XA_MARK_0 struct xarray lpi_xa; /* used by vgic-debug */ -- 2.53.0.rc1.225.gd81095ad13-goog The current implementation uses `vgic_state_iter` in `struct vgic_dist` to track the sequence position. This effectively makes the iterator shared across all open file descriptors for the VM. This approach has significant drawbacks: - It enforces mutual exclusion, preventing concurrent reads of the debugfs file (returning -EBUSY). - It relies on storing transient iterator state in the long-lived VM structure (`vgic_dist`). Refactor the implementation to use the standard `seq_file` iterator. Instead of storing state in `kvm_arch`, rely on the `pos` argument passed to the `start` and `next` callbacks, which tracks the logical index specific to the file descriptor. This change enables concurrent access and eliminates the `vgic_state_iter` field from `struct vgic_dist`. Signed-off-by: Fuad Tabba --- arch/arm64/kvm/vgic/vgic-debug.c | 40 ++++++++++---------------------- include/kvm/arm_vgic.h | 3 --- 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index ec3d0c1fe703..2c6776a1779b 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -104,58 +104,42 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) struct kvm *kvm = s->private; struct vgic_state_iter *iter; - mutex_lock(&kvm->arch.config_lock); - iter = kvm->arch.vgic.iter; - if (iter) { - iter = ERR_PTR(-EBUSY); - goto out; - } - iter = kmalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) { - iter = ERR_PTR(-ENOMEM); - goto out; - } + if (!iter) + return ERR_PTR(-ENOMEM); iter_init(kvm, iter, *pos); - kvm->arch.vgic.iter = iter; - if (end_of_vgic(iter)) + if (end_of_vgic(iter)) { + kfree(iter); iter = NULL; -out: - mutex_unlock(&kvm->arch.config_lock); + } + return iter; } static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) { struct kvm *kvm = s->private; - struct vgic_state_iter *iter = kvm->arch.vgic.iter; + struct vgic_state_iter *iter = v; ++*pos; iter_next(kvm, iter); - if (end_of_vgic(iter)) + if (end_of_vgic(iter)) { + kfree(iter); iter = NULL; + } return iter; } static void vgic_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = s->private; - struct vgic_state_iter *iter; + struct vgic_state_iter *iter = v; - /* - * If the seq file wasn't properly opened, there's nothing to clearn - * up. - */ - if (IS_ERR(v)) + if (IS_ERR_OR_NULL(v)) return; - mutex_lock(&kvm->arch.config_lock); - iter = kvm->arch.vgic.iter; kfree(iter); - kvm->arch.vgic.iter = NULL; - mutex_unlock(&kvm->arch.config_lock); } static void print_dist_state(struct seq_file *s, struct vgic_dist *dist, diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d32fafbd2907..f2eafc65bbf4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -302,9 +302,6 @@ struct vgic_dist { struct xarray lpi_xa; - /* used by vgic-debug */ - struct vgic_state_iter *iter; - /* * GICv4 ITS per-VM data, containing the IRQ domain, the VPE * array, the property table pointer as well as allocation -- 2.53.0.rc1.225.gd81095ad13-goog