The BYTE_MASK macro is the central point of the black magic in update_permission_bitmask(). Rename it to something that relates to how it is used, and add a comment explaining how it works. Using shifts instead of powers of two was actually suggested by David Hildenbrand back in 2017 for clarity[1] but I evidently forgot his suggestion when applying to kvm.git. [1] https://lore.kernel.org/kvm/e4b5df86-31ae-2f4e-0666-393753e256df@redhat.com/ Tested-by: David Riley Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 63 ++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 24fbc9ea502a..d94a488db79d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5529,31 +5529,55 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly) max_huge_page_level); } -#define BYTE_MASK(access) \ - ((1 & (access) ? 2 : 0) | \ - (2 & (access) ? 4 : 0) | \ - (3 & (access) ? 8 : 0) | \ - (4 & (access) ? 16 : 0) | \ - (5 & (access) ? 32 : 0) | \ - (6 & (access) ? 64 : 0) | \ - (7 & (access) ? 128 : 0)) - +/* + * Build a mask with all combinations of PTE access rights that + * include the given access bit. The mask can be queried with + * "mask & (1 << access)", where access is a combination of + * ACC_* bits. + * + * By mixing and matching multiple masks returned by ACC_BITS_MASK, + * update_permission_bitmask() builds what is effectively a + * two-dimensional array of bools. The second dimension is + * provided by individual bits of permissions[pfec >> 1], and + * logical &, | and ~ operations operate on all the 8 possible + * combinations of ACC_* bits. + */ +#define ACC_BITS_MASK(access) \ + ((1 & (access) ? 1 << 1 : 0) | \ + (2 & (access) ? 1 << 2 : 0) | \ + (3 & (access) ? 1 << 3 : 0) | \ + (4 & (access) ? 1 << 4 : 0) | \ + (5 & (access) ? 1 << 5 : 0) | \ + (6 & (access) ? 1 << 6 : 0) | \ + (7 & (access) ? 1 << 7 : 0)) static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept) { - unsigned byte; + unsigned index; - const u8 x = BYTE_MASK(ACC_EXEC_MASK); - const u8 w = BYTE_MASK(ACC_WRITE_MASK); - const u8 u = BYTE_MASK(ACC_USER_MASK); + const u8 x = ACC_BITS_MASK(ACC_EXEC_MASK); + const u8 w = ACC_BITS_MASK(ACC_WRITE_MASK); + const u8 u = ACC_BITS_MASK(ACC_USER_MASK); bool cr4_smep = is_cr4_smep(mmu); bool cr4_smap = is_cr4_smap(mmu); bool cr0_wp = is_cr0_wp(mmu); bool efer_nx = is_efer_nx(mmu); - for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { - unsigned pfec = byte << 1; + /* + * In hardware, page fault error codes are generated (as the name + * suggests) on any kind of page fault. permission_fault() and + * paging_tmpl.h already use the same bits after a successful page + * table walk, to indicate the kind of access being performed. + * + * However, PFERR_PRESENT_MASK and PFERR_RSVD_MASK are never set here, + * exactly because the page walk is successful. PFERR_PRESENT_MASK is + * removed by the shift, while PFERR_RSVD_MASK is repurposed in + * permission_fault() to indicate accesses that are *not* subject to + * SMAP restrictions. + */ + for (index = 0; index < ARRAY_SIZE(mmu->permissions); ++index) { + unsigned pfec = index << 1; /* * Each "*f" variable has a 1 bit for each UWX value @@ -5598,16 +5622,15 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept) * - The access is supervisor mode * - If implicit supervisor access or X86_EFLAGS_AC is clear * - * Here, we cover the first four conditions. - * The fifth is computed dynamically in permission_fault(); - * PFERR_RSVD_MASK bit will be set in PFEC if the access is - * *not* subject to SMAP restrictions. + * Here, we cover the first four conditions. The fifth + * is computed dynamically in permission_fault() and + * communicated by setting PFERR_RSVD_MASK. */ if (cr4_smap) smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf; } - mmu->permissions[byte] = ff | uf | wf | smepf | smapf; + mmu->permissions[index] = ff | uf | wf | smepf | smapf; } } -- 2.54.0