Deferring kernel range TLB flushes requires the guarantee that upon entering the kernel, no stale entry may be accessed. The simplest way to provide such a guarantee is to issue an unconditional flush upon switching to the kernel CR3, as this is the pivoting point where such stale entries may be accessed. As this is only relevant to NOHZ_FULL, restrict the mechanism to NOHZ_FULL CPUs. Note that the COALESCE_TLBI config option is introduced in a later commit, when the whole feature is implemented. Signed-off-by: Valentin Schneider --- arch/x86/entry/calling.h | 26 +++++++++++++++++++++++--- arch/x86/kernel/asm-offsets.c | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 813451b1ddecc..19fb6de276eac 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -9,6 +9,7 @@ #include #include #include +#include /* @@ -171,8 +172,27 @@ For 32-bit we have the following conventions - kernel is built with andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg .endm -.macro COALESCE_TLBI +.macro COALESCE_TLBI scratch_reg:req #ifdef CONFIG_COALESCE_TLBI + /* No point in doing this for housekeeping CPUs */ + movslq PER_CPU_VAR(cpu_number), \scratch_reg + bt \scratch_reg, tick_nohz_full_mask(%rip) + jnc .Lend_tlbi_\@ + + ALTERNATIVE "jmp .Lcr4_\@", "", X86_FEATURE_INVPCID + movq $(INVPCID_TYPE_ALL_INCL_GLOBAL), \scratch_reg + /* descriptor is all zeroes, point at the zero page */ + invpcid empty_zero_page(%rip), \scratch_reg + jmp .Lend_tlbi_\@ +.Lcr4_\@: + /* Note: this gives CR4 pinning the finger */ + movq PER_CPU_VAR(cpu_tlbstate + TLB_STATE_cr4), \scratch_reg + xorq $(X86_CR4_PGE), \scratch_reg + movq \scratch_reg, %cr4 + xorq $(X86_CR4_PGE), \scratch_reg + movq \scratch_reg, %cr4 + +.Lend_tlbi_\@: movl $1, PER_CPU_VAR(kernel_cr3_loaded) #endif // CONFIG_COALESCE_TLBI .endm @@ -188,7 +208,7 @@ For 32-bit we have the following conventions - kernel is built with mov %cr3, \scratch_reg ADJUST_KERNEL_CR3 \scratch_reg mov \scratch_reg, %cr3 - COALESCE_TLBI + COALESCE_TLBI \scratch_reg .Lend_\@: .endm @@ -256,7 +276,7 @@ For 32-bit we have the following conventions - kernel is built with ADJUST_KERNEL_CR3 \scratch_reg movq \scratch_reg, %cr3 - COALESCE_TLBI + COALESCE_TLBI \scratch_reg .Ldone_\@: .endm diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 6259b474073bc..f5abdcbb150d9 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -105,6 +105,7 @@ static void __used common(void) /* TLB state for the entry code */ OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); + OFFSET(TLB_STATE_cr4, tlb_state, cr4); /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); -- 2.51.0