The kpkeys_hardened_pgtables feature currently switches kpkeys level in every helper that writes to page tables, such as set_pte(). With kpkeys implemented using POE, this entails a pair of ISBs whenever such helper is called. A simple way to reduce this overhead is to make use of the lazy MMU mode. We amend the kpkeys_hardened_pgtables guard so that no level switch (i.e. POR_EL1 update) is issued while the lazy MMU mode is active. Instead, we switch to KPKEYS_LVL_PGTABLES when entering the lazy MMU mode, and restore the previous level when exiting it. Restoring the previous kpkeys level requires storing the original value of POR_EL1 somewhere. This is a full 64-bit value so we cannot simply use a TIF flag. There is no straightforward way to reuse current->thread.por_el1 for that purpose - this is where the current value of POR_EL1 is stored on a context switch, i.e. the value corresponding to KPKEYS_LVL_PGTABLES inside a lazy_mmu section. Instead, we add a new member to thread_struct to hold that value temporarily. This isn't optimal as that member is unused outside of lazy MMU sections, but it is the simplest option. Nesting of sections is not a concern as arch_{enter,leave}_lazy_mmu_mode() are not called in inner sections (nor do we need to do anything there). A further optimisation this patch makes is to merge the ISBs when exiting lazy_mmu mode. That is, if an ISB is going to be issued by emit_pte_barriers() because kernel pgtables were modified in the lazy MMU section, we skip the ISB after restoring POR_EL1. This is done by checking TIF_LAZY_MMU_PENDING and ensuring that POR_EL1 is restored before emit_pte_barriers() is called. Signed-off-by: Kevin Brodsky --- arch/arm64/include/asm/pgtable.h | 50 +++++++++++++++++++++++++++--- arch/arm64/include/asm/processor.h | 1 + 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8c85e23223da..556de0a4537e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -43,10 +43,44 @@ #ifdef CONFIG_KPKEYS_HARDENED_PGTABLES KPKEYS_GUARD_COND(kpkeys_hardened_pgtables, KPKEYS_LVL_PGTABLES, - kpkeys_hardened_pgtables_enabled()) -#else + kpkeys_hardened_pgtables_enabled() && + !is_lazy_mmu_mode_active()) + +static void kpkeys_lazy_mmu_enter(void) +{ + if (!kpkeys_hardened_pgtables_enabled()) + return; + + current->thread.por_el1_lazy_mmu = kpkeys_set_level(KPKEYS_LVL_PGTABLES); +} + +static void kpkeys_lazy_mmu_exit(void) +{ + u64 saved_por_el1; + + if (!kpkeys_hardened_pgtables_enabled()) + return; + + saved_por_el1 = current->thread.por_el1_lazy_mmu; + + /* + * We skip any barrier if TIF_LAZY_MMU_PENDING is set: + * emit_pte_barriers() will issue an ISB just after this function + * returns. + */ + if (test_thread_flag(TIF_LAZY_MMU_PENDING)) + __kpkeys_set_pkey_reg_nosync(saved_por_el1); + else + arch_kpkeys_restore_pkey_reg(saved_por_el1); +} +#else /* CONFIG_KPKEYS_HARDENED_PGTABLES */ KPKEYS_GUARD_NOOP(kpkeys_hardened_pgtables) -#endif + +static void kpkeys_lazy_mmu_enter(void) {} +static void kpkeys_lazy_mmu_exit(void) {} +#endif /* CONFIG_KPKEYS_HARDENED_PGTABLES */ + + static inline void emit_pte_barriers(void) { @@ -79,7 +113,10 @@ static inline void queue_pte_barriers(void) } } -static inline void arch_enter_lazy_mmu_mode(void) {} +static inline void arch_enter_lazy_mmu_mode(void) +{ + kpkeys_lazy_mmu_enter(); +} static inline void arch_flush_lazy_mmu_mode(void) { @@ -89,6 +126,11 @@ static inline void arch_flush_lazy_mmu_mode(void) static inline void arch_leave_lazy_mmu_mode(void) { + /* + * The ordering should be preserved to allow kpkeys_lazy_mmu_exit() + * to skip any barrier when TIF_LAZY_MMU_PENDING is set. + */ + kpkeys_lazy_mmu_exit(); arch_flush_lazy_mmu_mode(); } diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6095322343fc..c3a86ddce637 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -193,6 +193,7 @@ struct thread_struct { u64 tpidr2_el0; u64 por_el0; u64 por_el1; + u64 por_el1_lazy_mmu; #ifdef CONFIG_ARM64_GCS unsigned int gcs_el0_mode; unsigned int gcs_el0_locked; -- 2.51.2