The .so files for the vdso are named vdso*.so, and these structures are image of the corresponding vdso. Naming them accordingly is more consistent, very slightly more compact (by one character...) and simplifies the Makefile just a little bit. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/entry/syscall_32.c | 2 +- arch/x86/entry/vdso/Makefile | 8 ++++---- arch/x86/entry/vdso/vma.c | 10 +++++----- arch/x86/include/asm/elf.h | 2 +- arch/x86/include/asm/vdso.h | 6 +++--- arch/x86/kernel/process_64.c | 6 +++--- arch/x86/kernel/signal_32.c | 4 ++-- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 2b15ea17bb7c..eff33a4e0adc 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -318,7 +318,7 @@ __visible noinstr bool do_fast_syscall_32(struct pt_regs *regs) * convention. Adjust regs so it looks like we entered using int80. */ unsigned long landing_pad = (unsigned long)current->mm->context.vdso + - vdso_image_32.sym_int80_landing_pad; + vdso32_image.sym_int80_landing_pad; /* * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index f247f5f5cb44..7f833026d5b2 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -16,9 +16,9 @@ vobjs-$(CONFIG_X86_SGX) += vsgx.o obj-y += vma.o extable.o # vDSO images to build: -obj-$(CONFIG_X86_64) += vdso-image-64.o -obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o -obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o +obj-$(CONFIG_X86_64) += vdso64-image.o +obj-$(CONFIG_X86_X32_ABI) += vdsox32-image.o +obj-$(CONFIG_COMPAT_32) += vdso32-image.o vdso32-setup.o vobjs := $(addprefix $(obj)/, $(vobjs-y)) vobjs32 := $(addprefix $(obj)/, $(vobjs32-y)) @@ -44,7 +44,7 @@ hostprogs += vdso2c quiet_cmd_vdso2c = VDSO2C $@ cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ -$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE +$(obj)/vdso%-image.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE $(call if_changed,vdso2c) # diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index afe105b2f907..8f98c2d7c7a9 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -65,7 +65,7 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, static void vdso_fix_landing(const struct vdso_image *image, struct vm_area_struct *new_vma) { - if (in_ia32_syscall() && image == &vdso_image_32) { + if (in_ia32_syscall() && image == &vdso32_image) { struct pt_regs *regs = current_pt_regs(); unsigned long vdso_land = image->sym_int80_landing_pad; unsigned long old_land_addr = vdso_land + @@ -230,7 +230,7 @@ static int load_vdso32(void) if (vdso32_enabled != 1) /* Other values all mean "disabled" */ return 0; - return map_vdso(&vdso_image_32, 0); + return map_vdso(&vdso32_image, 0); } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -239,7 +239,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_64, 0); + return map_vdso(&vdso64_image, 0); } return load_vdso32(); @@ -252,7 +252,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, if (IS_ENABLED(CONFIG_X86_X32_ABI) && x32) { if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_x32, 0); + return map_vdso(&vdsox32_image, 0); } if (IS_ENABLED(CONFIG_IA32_EMULATION)) @@ -267,7 +267,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) const struct vdso_image *image = current->mm->context.vdso_image; unsigned long vdso = (unsigned long) current->mm->context.vdso; - if (in_ia32_syscall() && image == &vdso_image_32) { + if (in_ia32_syscall() && image == &vdso32_image) { if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad || regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad) return true; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 6c8fdc96be7e..2ba5f166e58f 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -361,7 +361,7 @@ else if (IS_ENABLED(CONFIG_IA32_EMULATION)) \ #define VDSO_ENTRY \ ((unsigned long)current->mm->context.vdso + \ - vdso_image_32.sym___kernel_vsyscall) + vdso32_image.sym___kernel_vsyscall) struct linux_binprm; diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index b7253ef3205a..e8afbe9faa5b 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -27,9 +27,9 @@ struct vdso_image { long sym_vdso32_rt_sigreturn_landing_pad; }; -extern const struct vdso_image vdso_image_64; -extern const struct vdso_image vdso_image_x32; -extern const struct vdso_image vdso_image_32; +extern const struct vdso_image vdso64_image; +extern const struct vdso_image vdsox32_image; +extern const struct vdso_image vdso32_image; extern int __init init_vdso_image(const struct vdso_image *image); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 52a5c03c353c..ae00c788962a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -942,14 +942,14 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) #ifdef CONFIG_CHECKPOINT_RESTORE # ifdef CONFIG_X86_X32_ABI case ARCH_MAP_VDSO_X32: - return prctl_map_vdso(&vdso_image_x32, arg2); + return prctl_map_vdso(&vdsox32_image, arg2); # endif # ifdef CONFIG_IA32_EMULATION case ARCH_MAP_VDSO_32: - return prctl_map_vdso(&vdso_image_32, arg2); + return prctl_map_vdso(&vdso32_image, arg2); # endif case ARCH_MAP_VDSO_64: - return prctl_map_vdso(&vdso_image_64, arg2); + return prctl_map_vdso(&vdso64_image, arg2); #endif #ifdef CONFIG_ADDRESS_MASKING case ARCH_GET_UNTAG_MASK: diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 42bbc42bd350..e55cf19e68fe 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -282,7 +282,7 @@ int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs) /* Return stub is in 32bit vsyscall page */ if (current->mm->context.vdso) restorer = current->mm->context.vdso + - vdso_image_32.sym___kernel_sigreturn; + vdso32_image.sym___kernel_sigreturn; else restorer = &frame->retcode; } @@ -368,7 +368,7 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) restorer = ksig->ka.sa.sa_restorer; else restorer = current->mm->context.vdso + - vdso_image_32.sym___kernel_rt_sigreturn; + vdso32_image.sym___kernel_rt_sigreturn; unsafe_put_user(ptr_to_compat(restorer), &frame->pretcode, Efault); /* -- 2.51.1 - Separate out the vdso sources into common, vdso32, and vdso64 directories. - Move the vdso2c tool to arch/x86/tools. - Build the 32- and 64-bit vdsos in their respective subdirectories; this greatly simplifies the build flags handling. - Unify the mangling of Makefile flags between the 32- and 64-bit vdso code as much as possible; all common rules are put in arch/x86/entry/vdso/common/Makefile.include. The remaining is very simple for 32 bits; the 64-bit one is only slightly more complicated because it contains the x32 generation rule. - Define __DISABLE_EXPORTS when building the vdso. This need seems to have been masked by different ordering compile flags before. - Change CONFIG_X86_64 to BUILD_VDSO32_64 in vdso32/system_call.S, to make it compatible with including fake_32bit_build.h. - The -fcf-protection= option was "leaking" from the kernel build, for reasons that was not clear to me. Futhermore, several distributions ship with it set to a default value other than "-fcf-protection=none". Make it match the configuration options for *user space*. Note that this patch may seem large, but the vast majority of it is simply code movement. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/Makefile | 2 +- arch/x86/entry/vdso/.gitignore | 11 +- arch/x86/entry/vdso/Makefile | 162 +----------------- arch/x86/entry/vdso/common/Makefile.include | 89 ++++++++++ .../entry/vdso/{vdso-note.S => common/note.S} | 5 +- .../entry/vdso/{ => common}/vclock_gettime.c | 0 .../entry/vdso/{ => common}/vdso-layout.lds.S | 0 arch/x86/entry/vdso/{ => common}/vgetcpu.c | 0 arch/x86/entry/vdso/vdso32/Makefile | 24 +++ arch/x86/entry/vdso/vdso32/note.S | 19 +- arch/x86/entry/vdso/vdso32/system_call.S | 2 +- arch/x86/entry/vdso/vdso32/vclock_gettime.c | 5 +- arch/x86/entry/vdso/vdso32/vdso32.lds.S | 2 +- arch/x86/entry/vdso/vdso32/vgetcpu.c | 4 +- arch/x86/entry/vdso/vdso64/Makefile | 45 +++++ arch/x86/entry/vdso/vdso64/note.S | 1 + arch/x86/entry/vdso/vdso64/vclock_gettime.c | 1 + .../vdso/{vdso.lds.S => vdso64/vdso64.lds.S} | 2 +- .../x86/entry/vdso/{ => vdso64}/vdsox32.lds.S | 2 +- arch/x86/entry/vdso/vdso64/vgetcpu.c | 1 + .../vdso/{ => vdso64}/vgetrandom-chacha.S | 0 arch/x86/entry/vdso/{ => vdso64}/vgetrandom.c | 2 +- arch/x86/entry/vdso/{ => vdso64}/vsgx.S | 0 arch/x86/tools/Makefile | 15 +- arch/x86/{entry/vdso => tools}/vdso2c.c | 0 arch/x86/{entry/vdso => tools}/vdso2c.h | 0 26 files changed, 194 insertions(+), 200 deletions(-) create mode 100644 arch/x86/entry/vdso/common/Makefile.include rename arch/x86/entry/vdso/{vdso-note.S => common/note.S} (62%) rename arch/x86/entry/vdso/{ => common}/vclock_gettime.c (100%) rename arch/x86/entry/vdso/{ => common}/vdso-layout.lds.S (100%) rename arch/x86/entry/vdso/{ => common}/vgetcpu.c (100%) create mode 100644 arch/x86/entry/vdso/vdso32/Makefile create mode 100644 arch/x86/entry/vdso/vdso64/Makefile create mode 100644 arch/x86/entry/vdso/vdso64/note.S create mode 100644 arch/x86/entry/vdso/vdso64/vclock_gettime.c rename arch/x86/entry/vdso/{vdso.lds.S => vdso64/vdso64.lds.S} (94%) rename arch/x86/entry/vdso/{ => vdso64}/vdsox32.lds.S (92%) create mode 100644 arch/x86/entry/vdso/vdso64/vgetcpu.c rename arch/x86/entry/vdso/{ => vdso64}/vgetrandom-chacha.S (100%) rename arch/x86/entry/vdso/{ => vdso64}/vgetrandom.c (91%) rename arch/x86/entry/vdso/{ => vdso64}/vsgx.S (100%) rename arch/x86/{entry/vdso => tools}/vdso2c.c (100%) rename arch/x86/{entry/vdso => tools}/vdso2c.h (100%) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a27efcf3c20..b80947b14582 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -250,7 +250,7 @@ endif archscripts: scripts_basic - $(Q)$(MAKE) $(build)=arch/x86/tools relocs + $(Q)$(MAKE) $(build)=arch/x86/tools relocs vdso2c ### # Syscall table generation diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore index 37a6129d597b..eb60859dbcbf 100644 --- a/arch/x86/entry/vdso/.gitignore +++ b/arch/x86/entry/vdso/.gitignore @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -vdso.lds -vdsox32.lds -vdso32-syscall-syms.lds -vdso32-sysenter-syms.lds -vdso32-int80-syms.lds -vdso-image-*.c -vdso2c +*.lds +*.so +*.so.dbg +vdso*-image.c diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 7f833026d5b2..987b43fd4cd3 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,160 +3,10 @@ # Building vDSO images for x86. # -# Include the generic Makefile to check the built vDSO: -include $(srctree)/lib/vdso/Makefile.include +# Regular kernel objects +obj-y := vma.o extable.o +obj-$(CONFIG_COMPAT_32) += vdso32-setup.o -# Files to link into the vDSO: -vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o -vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o -vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o -vobjs-$(CONFIG_X86_SGX) += vsgx.o - -# Files to link into the kernel: -obj-y += vma.o extable.o - -# vDSO images to build: -obj-$(CONFIG_X86_64) += vdso64-image.o -obj-$(CONFIG_X86_X32_ABI) += vdsox32-image.o -obj-$(CONFIG_COMPAT_32) += vdso32-image.o vdso32-setup.o - -vobjs := $(addprefix $(obj)/, $(vobjs-y)) -vobjs32 := $(addprefix $(obj)/, $(vobjs32-y)) - -$(obj)/vdso.o: $(obj)/vdso.so - -targets += vdso.lds $(vobjs-y) -targets += vdso32/vdso32.lds $(vobjs32-y) - -targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg) - -CPPFLAGS_vdso.lds += -P -C - -VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 - -$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE - $(call if_changed,vdso_and_check) - -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi -hostprogs += vdso2c - -quiet_cmd_vdso2c = VDSO2C $@ - cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ - -$(obj)/vdso%-image.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE - $(call if_changed,vdso2c) - -# -# Don't omit frame pointers for ease of userspace debugging, but do -# optimize sibling calls. -# -CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ - $(filter -g%,$(KBUILD_CFLAGS)) -fno-stack-protector \ - -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO - -ifdef CONFIG_MITIGATION_RETPOLINE -ifneq ($(RETPOLINE_VDSO_CFLAGS),) - CFL += $(RETPOLINE_VDSO_CFLAGS) -endif -endif - -$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) -$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO - -# -# vDSO code runs in userspace and -pg doesn't help with profiling anyway. -# -CFLAGS_REMOVE_vclock_gettime.o = -pg -CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg -CFLAGS_REMOVE_vgetcpu.o = -pg -CFLAGS_REMOVE_vdso32/vgetcpu.o = -pg -CFLAGS_REMOVE_vsgx.o = -pg -CFLAGS_REMOVE_vgetrandom.o = -pg - -# -# X32 processes use x32 vDSO to access 64bit kernel data. -# -# Build x32 vDSO image: -# 1. Compile x32 vDSO as 64bit. -# 2. Convert object files to x32. -# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes -# so that it can reach 64bit address space with 64bit pointers. -# - -CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 - -# x32-rebranded versions -vobjx32s-y := $(vobjs-y:.o=-x32.o) - -# same thing, but in the output directory -vobjx32s := $(addprefix $(obj)/, $(vobjx32s-y)) - -# Convert 64bit object file to x32 for x32 vDSO. -quiet_cmd_x32 = X32 $@ - cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ - -$(obj)/%-x32.o: $(obj)/%.o FORCE - $(call if_changed,x32) - -targets += vdsox32.lds $(vobjx32s-y) - -$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE - $(call if_changed,vdso_and_check) - -CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 - -KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO -$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) -$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32 - -KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(KSTACK_ERASE_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic -KBUILD_CFLAGS_32 += -fno-stack-protector -KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) -KBUILD_CFLAGS_32 += -fno-omit-frame-pointer -KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += -DBUILD_VDSO - -ifdef CONFIG_MITIGATION_RETPOLINE -ifneq ($(RETPOLINE_VDSO_CFLAGS),) - KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) -endif -endif - -$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) - -$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE - $(call if_changed,vdso_and_check) - -# -# The DSO images are built using a special linker script. -# -quiet_cmd_vdso = VDSO $@ - cmd_vdso = $(LD) -o $@ \ - $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -T $(filter %.lds,$^) $(filter %.o,$^) - -VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 --no-undefined \ - $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack - -quiet_cmd_vdso_and_check = VDSO $@ - cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) +# vDSO directories +obj-$(CONFIG_X86_64) += vdso64/ +obj-$(CONFIG_COMPAT_32) += vdso32/ diff --git a/arch/x86/entry/vdso/common/Makefile.include b/arch/x86/entry/vdso/common/Makefile.include new file mode 100644 index 000000000000..3514b4a6869b --- /dev/null +++ b/arch/x86/entry/vdso/common/Makefile.include @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Building vDSO images for x86. +# + +# Include the generic Makefile to check the built vDSO: +include $(srctree)/lib/vdso/Makefile.include + +obj-y += $(foreach x,$(vdsos-y),vdso$(x)-image.o) + +targets += $(foreach x,$(vdsos-y),vdso$(x)-image.c vdso$(x).so vdso$(x).so.dbg vdso$(x).lds) +targets += $(vobjs-y) + +# vobjs-y with $(obj)/ prepended +vobjs := $(addprefix $(obj)/,$(vobjs-y)) + +# Options for vdso*.lds +CPPFLAGS_VDSO_LDS := -P -C -I$(src)/.. +$(obj)/%.lds : KBUILD_CPPFLAGS += $(CPPFLAGS_VDSO_LDS) + +# +# Options from KBUILD_[AC]FLAGS that should *NOT* be kept +# +flags-remove-y += \ + -D__KERNEL__ -mcmodel=kernel -mregparm=3 \ + -fno-pic -fno-PIC -fno-pie fno-PIE \ + -mfentry -pg \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGIN_CFLAGS) $(KSTACK_ERASE_CFLAGS) \ + $(RETPOLINE_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + $(PADDING_CFLAGS) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +flags-y += -D__DISABLE_EXPORTS +flags-y += -DDISABLE_BRANCH_PROFILING +flags-y += -DBUILD_VDSO +flags-y += -I$(src)/.. -I$(srctree) +flags-y += -O2 -fpic +flags-y += -fno-stack-protector +flags-y += -fno-omit-frame-pointer +flags-y += -foptimize-sibling-calls +flags-y += -fasynchronous-unwind-tables + +# Reset cf protections enabled by compiler default +flags-y += $(call cc-option, -fcf-protection=none) +flags-$(X86_USER_SHADOW_STACK) += $(call cc-option, -fcf-protection=return) +# When user space IBT is supported, enable this. +# flags-$(CONFIG_USER_IBT) += $(call cc-option, -fcf-protection=branch) + +flags-$(CONFIG_MITIGATION_RETPOLINE) += $(RETPOLINE_VDSO_CFLAGS) + +# These need to be conditional on $(vobjs) as they do not apply to +# the output vdso*-image.o files which are standard kernel objects. +$(vobjs) : KBUILD_AFLAGS := \ + $(filter-out $(flags-remove-y),$(KBUILD_AFLAGS)) $(flags-y) +$(vobjs) : KBUILD_CFLAGS := \ + $(filter-out $(flags-remove-y),$(KBUILD_CFLAGS)) $(flags-y) + +# +# The VDSO images are built using a special linker script. +# +VDSO_LDFLAGS := -shared --hash-style=both --build-id=sha1 --no-undefined \ + $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack + +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(LD) -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$*) \ + -T $(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) + +$(obj)/vdso%.so.dbg: $(obj)/vdso%.lds FORCE + $(call if_changed,vdso_and_check) + +$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +VDSO2C = $(objtree)/arch/x86/tools/vdso2c + +quiet_cmd_vdso2c = VDSO2C $@ + cmd_vdso2c = $(VDSO2C) $< $(<:%.dbg=%) $@ + +$(obj)/%-image.c: $(obj)/%.so.dbg $(obj)/%.so $(VDSO2C) FORCE + $(call if_changed,vdso2c) + +$(obj)/%-image.o: $(obj)/%-image.c diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/common/note.S similarity index 62% rename from arch/x86/entry/vdso/vdso-note.S rename to arch/x86/entry/vdso/common/note.S index 79423170118f..2cbd39939dc6 100644 --- a/arch/x86/entry/vdso/vdso-note.S +++ b/arch/x86/entry/vdso/common/note.S @@ -1,13 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. * Here we can supply some information useful to userland. */ #include -#include #include #include +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/common/vclock_gettime.c similarity index 100% rename from arch/x86/entry/vdso/vclock_gettime.c rename to arch/x86/entry/vdso/common/vclock_gettime.c diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/common/vdso-layout.lds.S similarity index 100% rename from arch/x86/entry/vdso/vdso-layout.lds.S rename to arch/x86/entry/vdso/common/vdso-layout.lds.S diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/common/vgetcpu.c similarity index 100% rename from arch/x86/entry/vdso/vgetcpu.c rename to arch/x86/entry/vdso/common/vgetcpu.c diff --git a/arch/x86/entry/vdso/vdso32/Makefile b/arch/x86/entry/vdso/vdso32/Makefile new file mode 100644 index 000000000000..add6afb484ba --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# 32-bit vDSO images for x86. +# + +# The vDSOs built in this directory +vdsos-y := 32 + +# Files to link into the vDSO: +vobjs-y := note.o vclock_gettime.o vgetcpu.o +vobjs-y += system_call.o sigreturn.o + +# Compilation flags +flags-y := -DBUILD_VDSO32 -m32 -mregparm=0 +flags-$(CONFIG_X86_64) += -include $(src)/fake_32bit_build.h +flags-remove-y := -m64 + +# The location of this include matters! +include $(src)/../common/Makefile.include + +# Linker options for the vdso +VDSO_LDFLAGS_32 := -m elf_i386 -soname linux-gate.so.1 + +$(obj)/vdso32.so.dbg: $(vobjs) diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S index 2cbd39939dc6..62d8aa51ce99 100644 --- a/arch/x86/entry/vdso/vdso32/note.S +++ b/arch/x86/entry/vdso/vdso32/note.S @@ -1,18 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include -#include -#include - -/* Ideally this would use UTS_NAME, but using a quoted string here - doesn't work. Remember to change this when changing the - kernel's name. */ -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END - -BUILD_SALT +#include "common/note.S" diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index d33c6513fd2c..2a15634bbe75 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -52,7 +52,7 @@ __kernel_vsyscall: #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" -#ifdef CONFIG_X86_64 +#ifdef BUILD_VDSO32_64 /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \ SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 86981decfea8..1481f0021b9f 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -1,4 +1 @@ -// SPDX-License-Identifier: GPL-2.0 -#define BUILD_VDSO32 -#include "fake_32bit_build.h" -#include "../vclock_gettime.c" +#include "common/vclock_gettime.c" diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 8a3be07006bb..8a853543fc0d 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -11,7 +11,7 @@ #define BUILD_VDSO32 -#include "../vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* The ELF entry point can be used to set the AT_SYSINFO value. */ ENTRY(__kernel_vsyscall); diff --git a/arch/x86/entry/vdso/vdso32/vgetcpu.c b/arch/x86/entry/vdso/vdso32/vgetcpu.c index 3a9791f5e998..00cc8325a020 100644 --- a/arch/x86/entry/vdso/vdso32/vgetcpu.c +++ b/arch/x86/entry/vdso/vdso32/vgetcpu.c @@ -1,3 +1 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "fake_32bit_build.h" -#include "../vgetcpu.c" +#include "common/vgetcpu.c" diff --git a/arch/x86/entry/vdso/vdso64/Makefile b/arch/x86/entry/vdso/vdso64/Makefile new file mode 100644 index 000000000000..245996f34eae --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/Makefile @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# 64-bit vDSO images for x86. +# + +# The vDSOs built in this directory +vdsos-y := 64 +vdsos-$(CONFIG_X86_X32_ABI) += x32 + +# Files to link into the vDSO: +vobjs-y := note.o vclock_gettime.o vgetcpu.o +vobjs-y += vgetrandom.o vgetrandom-chacha.o +vobjs-$(CONFIG_X86_SGX) += vsgx.o + +# Compilation flags +flags-y := -DBUILD_VDSO64 -m64 -mcmodel=small + +# The location of this include matters! +include $(src)/../common/Makefile.include + +# +# X32 processes use x32 vDSO to access 64bit kernel data. +# +# Build x32 vDSO image: +# 1. Compile x32 vDSO as 64bit. +# 2. Convert object files to x32. +# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes +# so that it can reach 64bit address space with 64bit pointers. +# + +# Convert 64bit object file to x32 for x32 vDSO. +quiet_cmd_x32 = X32 $@ + cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ + +$(obj)/%-x32.o: $(obj)/%.o FORCE + $(call if_changed,x32) + +vobjsx32 = $(patsubst %.o,%-x32.o,$(vobjs)) + +# Linker options for the vdso +VDSO_LDFLAGS_64 := -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 +VDSO_LDFLAGS_x32 := $(subst elf_x86_64,elf32_x86_64,$(VDSO_LDFLAGS_64)) + +$(obj)/vdso64.so.dbg: $(vobjs) +$(obj)/vdsox32.so.dbg: $(vobjsx32) diff --git a/arch/x86/entry/vdso/vdso64/note.S b/arch/x86/entry/vdso/vdso64/note.S new file mode 100644 index 000000000000..62d8aa51ce99 --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/note.S @@ -0,0 +1 @@ +#include "common/note.S" diff --git a/arch/x86/entry/vdso/vdso64/vclock_gettime.c b/arch/x86/entry/vdso/vdso64/vclock_gettime.c new file mode 100644 index 000000000000..1481f0021b9f --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/vclock_gettime.c @@ -0,0 +1 @@ +#include "common/vclock_gettime.c" diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso64/vdso64.lds.S similarity index 94% rename from arch/x86/entry/vdso/vdso.lds.S rename to arch/x86/entry/vdso/vdso64/vdso64.lds.S index 0bab5f4af6d1..5ce3f2b6373a 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso64/vdso64.lds.S @@ -9,7 +9,7 @@ #define BUILD_VDSO64 -#include "vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* * This controls what userland symbols we export from the vDSO. diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdso64/vdsox32.lds.S similarity index 92% rename from arch/x86/entry/vdso/vdsox32.lds.S rename to arch/x86/entry/vdso/vdso64/vdsox32.lds.S index 16a8050a4fb6..3dbd20c8dacc 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdso64/vdsox32.lds.S @@ -9,7 +9,7 @@ #define BUILD_VDSOX32 -#include "vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* * This controls what userland symbols we export from the vDSO. diff --git a/arch/x86/entry/vdso/vdso64/vgetcpu.c b/arch/x86/entry/vdso/vdso64/vgetcpu.c new file mode 100644 index 000000000000..00cc8325a020 --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/vgetcpu.c @@ -0,0 +1 @@ +#include "common/vgetcpu.c" diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S similarity index 100% rename from arch/x86/entry/vdso/vgetrandom-chacha.S rename to arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S diff --git a/arch/x86/entry/vdso/vgetrandom.c b/arch/x86/entry/vdso/vdso64/vgetrandom.c similarity index 91% rename from arch/x86/entry/vdso/vgetrandom.c rename to arch/x86/entry/vdso/vdso64/vgetrandom.c index 430862b8977c..6a95d36b12d9 100644 --- a/arch/x86/entry/vdso/vgetrandom.c +++ b/arch/x86/entry/vdso/vdso64/vgetrandom.c @@ -4,7 +4,7 @@ */ #include -#include "../../../../lib/vdso/getrandom.c" +#include "lib/vdso/getrandom.c" ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) { diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vdso64/vsgx.S similarity index 100% rename from arch/x86/entry/vdso/vsgx.S rename to arch/x86/entry/vdso/vdso64/vsgx.S diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 7278e2545c35..39a183fffd04 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -38,9 +38,14 @@ $(obj)/insn_decoder_test.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tool $(obj)/insn_sanity.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tools/arch/x86/lib/inat.c $(srctree)/tools/arch/x86/include/asm/inat_types.h $(srctree)/tools/arch/x86/include/asm/inat.h $(srctree)/tools/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -hostprogs += relocs -relocs-objs := relocs_32.o relocs_64.o relocs_common.o -PHONY += relocs -relocs: $(obj)/relocs +HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi \ + -I$(srctree)/arch/$(SUBARCH)/include/uapi + +hostprogs += relocs vdso2c +relocs-objs := relocs_32.o relocs_64.o relocs_common.o + +always-y := $(hostprogs) + +PHONY += $(hostprogs) +$(hostprogs): %: $(obj)/% @: diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/tools/vdso2c.c similarity index 100% rename from arch/x86/entry/vdso/vdso2c.c rename to arch/x86/tools/vdso2c.c diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/tools/vdso2c.h similarity index 100% rename from arch/x86/entry/vdso/vdso2c.h rename to arch/x86/tools/vdso2c.h -- 2.51.1 There is no fundamental reason to use the int80_landing_pad symbol to adjust ip when moving the vdso. If ip falls within the vdso, and the vdso is moved, we should change the ip accordingly, regardless of mode or location within the vdso. This *currently* can only happen on 32 bits, but there isn't any reason not to do so generically. Note that if this is ever possible from a vdso-internal call, then the user space stack will also needed to be adjusted (as well as the shadow stack, if enabled.) Fortunately this is not currently the case. At the moment, we don't even consider other threads when moving the vdso. The assumption is that it is only used by process freeze/thaw for migration, where this is not an issue. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/entry/vdso/vma.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 8f98c2d7c7a9..e7fd7517370f 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -65,16 +65,12 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, static void vdso_fix_landing(const struct vdso_image *image, struct vm_area_struct *new_vma) { - if (in_ia32_syscall() && image == &vdso32_image) { - struct pt_regs *regs = current_pt_regs(); - unsigned long vdso_land = image->sym_int80_landing_pad; - unsigned long old_land_addr = vdso_land + - (unsigned long)current->mm->context.vdso; - - /* Fixing userspace landing - look at do_fast_syscall_32 */ - if (regs->ip == old_land_addr) - regs->ip = new_vma->vm_start + vdso_land; - } + struct pt_regs *regs = current_pt_regs(); + unsigned long ipoffset = regs->ip - + (unsigned long)current->mm->context.vdso; + + if (ipoffset < image->size) + regs->ip = new_vma->vm_start + ipoffset; } static int vdso_mremap(const struct vm_special_mapping *sm, -- 2.51.1 A macro SYSCALL_ENTER_KERNEL was defined in sigreturn.S, with the ability of overriding it. The override capability, however, is not used anywhere, and the macro name is potentially confusing because it seems to imply that sysenter/syscall could be used here, which is NOT true: the sigreturn system calls MUST use int $0x80. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/entry/vdso/vdso32/sigreturn.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index 1bd068f72d4c..965900c6763b 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -3,10 +3,6 @@ #include #include -#ifndef SYSCALL_ENTER_KERNEL -#define SYSCALL_ENTER_KERNEL int $0x80 -#endif - .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function @@ -16,7 +12,7 @@ __kernel_sigreturn: .LSTART_sigreturn: popl %eax /* XXX does this mean it needs unwind info? */ movl $__NR_sigreturn, %eax - SYSCALL_ENTER_KERNEL + int $0x80 .LEND_sigreturn: SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL) nop @@ -28,7 +24,7 @@ SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL) __kernel_rt_sigreturn: .LSTART_rt_sigreturn: movl $__NR_rt_sigreturn, %eax - SYSCALL_ENTER_KERNEL + int $0x80 .LEND_rt_sigreturn: SYM_INNER_LABEL(vdso32_rt_sigreturn_landing_pad, SYM_L_GLOBAL) nop -- 2.51.1 The vdso32 sigreturn.S contains open-coded DWARF bytecode, which includes a hack for gdb to not try to step back to a previous call instruction when backtracing from a signal handler. Neither of those are necessary anymore: the backtracing issue is handled by ".cfi_entry simple" and ".cfi_signal_frame", both of which have been supported for a very long time now, which allows the remaining frame to be built using regular .cfi annotations. Add a few more register offsets to the signal frame just for good measure. Replace the nop on fallthrough of the system call (which should never, ever happen) with a ud2a trap. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/entry/vdso/vdso32/sigreturn.S | 146 ++++++------------------- arch/x86/include/asm/dwarf2.h | 1 + arch/x86/kernel/asm-offsets.c | 6 + 3 files changed, 39 insertions(+), 114 deletions(-) diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index 965900c6763b..25b0ac4b4bfe 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -1,136 +1,54 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #include +.macro STARTPROC_SIGNAL_FRAME sc + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + /* -4 as pretcode has already been popped */ + CFI_DEF_CFA esp, \sc - 4 + CFI_OFFSET eip, IA32_SIGCONTEXT_ip + CFI_OFFSET eax, IA32_SIGCONTEXT_ax + CFI_OFFSET ebx, IA32_SIGCONTEXT_bx + CFI_OFFSET ecx, IA32_SIGCONTEXT_cx + CFI_OFFSET edx, IA32_SIGCONTEXT_dx + CFI_OFFSET esp, IA32_SIGCONTEXT_sp + CFI_OFFSET ebp, IA32_SIGCONTEXT_bp + CFI_OFFSET esi, IA32_SIGCONTEXT_si + CFI_OFFSET edi, IA32_SIGCONTEXT_di + CFI_OFFSET es, IA32_SIGCONTEXT_es + CFI_OFFSET cs, IA32_SIGCONTEXT_cs + CFI_OFFSET ss, IA32_SIGCONTEXT_ss + CFI_OFFSET ds, IA32_SIGCONTEXT_ds + CFI_OFFSET eflags, IA32_SIGCONTEXT_flags +.endm + .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function - nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */ ALIGN __kernel_sigreturn: -.LSTART_sigreturn: - popl %eax /* XXX does this mean it needs unwind info? */ + STARTPROC_SIGNAL_FRAME IA32_SIGFRAME_sigcontext + popl %eax + CFI_ADJUST_CFA_OFFSET -4 movl $__NR_sigreturn, %eax int $0x80 -.LEND_sigreturn: SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL) - nop - .size __kernel_sigreturn,.-.LSTART_sigreturn + ud2a + CFI_ENDPROC + .size __kernel_sigreturn,.-__kernel_sigreturn .globl __kernel_rt_sigreturn .type __kernel_rt_sigreturn,@function ALIGN __kernel_rt_sigreturn: -.LSTART_rt_sigreturn: + STARTPROC_SIGNAL_FRAME IA32_RT_SIGFRAME_sigcontext movl $__NR_rt_sigreturn, %eax int $0x80 -.LEND_rt_sigreturn: SYM_INNER_LABEL(vdso32_rt_sigreturn_landing_pad, SYM_L_GLOBAL) - nop - .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn - .previous - - .section .eh_frame,"a",@progbits -.LSTARTFRAMEDLSI1: - .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 -.LSTARTCIEDLSI1: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zRS" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0 /* DW_CFA_nop */ - .align 4 -.LENDCIEDLSI1: - .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ -.LSTARTFDEDLSI1: - .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ - /* HACK: The dwarf2 unwind routines will subtract 1 from the - return address to get an address in the middle of the - presumed call instruction. Since we didn't get here via - a call, we need to include the nop before the real start - to make up for it. */ - .long .LSTART_sigreturn-1-. /* PC-relative start address */ - .long .LEND_sigreturn-.LSTART_sigreturn+1 - .uleb128 0 /* Augmentation */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - complicated by the fact that the "CFA" is always assumed to - be the value of the stack pointer in the caller. This means - that we must define the CFA of this body of code to be the - saved value of the stack pointer in the sigcontext. Which - also means that there is no fixed relation to the other - saved registers, which means that we must use DW_CFA_expression - to compute their addresses. It also means that when we - adjust the stack with the popl, we have to do it all over again. */ - -#define do_cfa_expr(offset) \ - .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ - .byte 0x06; /* DW_OP_deref */ \ -1: - -#define do_expr(regno, offset) \ - .byte 0x10; /* DW_CFA_expression */ \ - .uleb128 regno; /* regno */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ -1: - - do_cfa_expr(IA32_SIGCONTEXT_sp+4) - do_expr(0, IA32_SIGCONTEXT_ax+4) - do_expr(1, IA32_SIGCONTEXT_cx+4) - do_expr(2, IA32_SIGCONTEXT_dx+4) - do_expr(3, IA32_SIGCONTEXT_bx+4) - do_expr(5, IA32_SIGCONTEXT_bp+4) - do_expr(6, IA32_SIGCONTEXT_si+4) - do_expr(7, IA32_SIGCONTEXT_di+4) - do_expr(8, IA32_SIGCONTEXT_ip+4) - - .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ - - do_cfa_expr(IA32_SIGCONTEXT_sp) - do_expr(0, IA32_SIGCONTEXT_ax) - do_expr(1, IA32_SIGCONTEXT_cx) - do_expr(2, IA32_SIGCONTEXT_dx) - do_expr(3, IA32_SIGCONTEXT_bx) - do_expr(5, IA32_SIGCONTEXT_bp) - do_expr(6, IA32_SIGCONTEXT_si) - do_expr(7, IA32_SIGCONTEXT_di) - do_expr(8, IA32_SIGCONTEXT_ip) - - .align 4 -.LENDFDEDLSI1: - - .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ -.LSTARTFDEDLSI2: - .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ - /* HACK: See above wrt unwind library assumptions. */ - .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ - .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 - .uleb128 0 /* Augmentation */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - slightly less complicated than the above, since we don't - modify the stack pointer in the process. */ - - do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp) - do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax) - do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx) - do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx) - do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx) - do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp) - do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si) - do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di) - do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip) - - .align 4 -.LENDFDEDLSI2: + ud2a + CFI_ENDPROC + .size __kernel_rt_sigreturn,.-__kernel_rt_sigreturn .previous diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 302e11b15da8..09c9684d3ad6 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -20,6 +20,7 @@ #define CFI_RESTORE_STATE .cfi_restore_state #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape +#define CFI_SIGNAL_FRAME .cfi_signal_frame #ifndef BUILD_VDSO /* diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 32ba599a51f8..a7e7df837405 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -63,8 +63,14 @@ static void __used common(void) OFFSET(IA32_SIGCONTEXT_bp, sigcontext_32, bp); OFFSET(IA32_SIGCONTEXT_sp, sigcontext_32, sp); OFFSET(IA32_SIGCONTEXT_ip, sigcontext_32, ip); + OFFSET(IA32_SIGCONTEXT_es, sigcontext_32, es); + OFFSET(IA32_SIGCONTEXT_cs, sigcontext_32, cs); + OFFSET(IA32_SIGCONTEXT_ss, sigcontext_32, ss); + OFFSET(IA32_SIGCONTEXT_ds, sigcontext_32, ds); + OFFSET(IA32_SIGCONTEXT_flags, sigcontext_32, flags); BLANK(); + OFFSET(IA32_SIGFRAME_sigcontext, sigframe_ia32, sc); OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); #endif -- 2.51.1 Currently the vdso doesn't include .note.gnu.property or a GNU noexec stack annotation (the -z noexecstack in the linker script is ineffective because we specify PHDRs explicitly.) The motivation is that the dynamic linker currently do not check these. However, this is a weak excuse: the vdso*.so are also supposed to be usable at link libraries, and there is no reason why the dynamic linker might not want or need to check these in the future, so add them back in -- it is trivial enough. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/entry/vdso/common/vdso-layout.lds.S | 51 +++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/arch/x86/entry/vdso/common/vdso-layout.lds.S b/arch/x86/entry/vdso/common/vdso-layout.lds.S index ec1ac191a057..696bacb99ed8 100644 --- a/arch/x86/entry/vdso/common/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/common/vdso-layout.lds.S @@ -26,7 +26,7 @@ SECTIONS . = SIZEOF_HEADERS; - .hash : { *(.hash) } :text + .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } .dynsym : { *(.dynsym) } .dynstr : { *(.dynstr) } @@ -34,7 +34,7 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } - .dynamic : { *(.dynamic) } :text :dynamic + .dynamic : { *(.dynamic) } :text :dynamic .rodata : { *(.rodata*) @@ -45,31 +45,28 @@ SECTIONS *(.bss*) *(.dynbss*) *(.gnu.linkonce.b.*) - } :text + } :text - /* - * Discard .note.gnu.property sections which are unused and have - * different alignment requirement from vDSO note sections. - */ - /DISCARD/ : { + .note.gnu.property : { *(.note.gnu.property) - } - .note : { *(.note.*) } :text :note - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text + } :text :note :gnu_property + .note : { + *(.note*) + } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { + KEEP (*(.eh_frame)) + *(.eh_frame.*) + } :text /* * Text is well-separated from actual data: there's plenty of * stuff that isn't used at runtime in between. - */ - + */ .text : { *(.text*) - } :text =0x90909090, - - + } :text =0x90909090 .altinstructions : { *(.altinstructions) } :text .altinstr_replacement : { *(.altinstr_replacement) } :text @@ -87,15 +84,23 @@ SECTIONS * Very old versions of ld do not recognize this name token; use the constant. */ #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_STACK 0x6474e551 +#define PT_GNU_PROPERTY 0x6474e553 /* * We must supply the ELF program headers explicitly to get just one * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ +*/ +#define PF_R FLAGS(4) +#define PF_RW FLAGS(6) +#define PF_RX FLAGS(5) + PHDRS { - text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; + text PT_LOAD PF_RX FILEHDR PHDRS; + dynamic PT_DYNAMIC PF_R; + note PT_NOTE PF_R; + eh_frame_hdr PT_GNU_EH_FRAME PF_R; + gnu_stack PT_GNU_STACK PF_RW; + gnu_property PT_GNU_PROPERTY PF_R; } -- 2.51.1 Abstract out the calling of true system calls from the vdso into macros. It has been a very long time since gcc did not allow %ebx or %ebp in inline asm in 32-bit PIC mode; remove the corresponding hacks. Remove the use of memory output constraints in gettimeofday.h in favor of "memory" clobbers. The resulting code is identical for the current use cases, as the system call is usually a terminal fallback anyway, and it merely complicates the macroization. This patch adds only a handful of more lines of code than it removes, and in fact could be made substantially smaller by removing the macros for the argument counts that aren't currently used, however, it seems better to be general from the start. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/include/asm/vdso/gettimeofday.h | 108 ++------------------ arch/x86/include/asm/vdso/sys_call.h | 119 +++++++++++++++++++++++ 2 files changed, 127 insertions(+), 100 deletions(-) create mode 100644 arch/x86/include/asm/vdso/sys_call.h diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 73b2e7ee8f0f..3cf214cc4a75 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -18,6 +18,7 @@ #include #include #include +#include #define VDSO_HAS_TIME 1 @@ -53,130 +54,37 @@ extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif -#ifndef BUILD_VDSO32 - static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { - long ret; - - asm ("syscall" : "=a" (ret), "=m" (*_ts) : - "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : - "rcx", "r11"); - - return ret; + return VDSO_SYSCALL2(clock_gettime,64,_clkid,_ts); } static __always_inline long gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) { - long ret; - - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); - - return ret; + return VDSO_SYSCALL2(gettimeofday,,_tv,_tz); } static __always_inline long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { - long ret; - - asm ("syscall" : "=a" (ret), "=m" (*_ts) : - "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : - "rcx", "r11"); - - return ret; + return VDSO_SYSCALL2(clock_getres,_time64,_clkid,_ts); } -#else - -static __always_inline -long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) -{ - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; -} +#ifndef CONFIG_X86_64 static __always_inline long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; -} - -static __always_inline -long gettimeofday_fallback(struct __kernel_old_timeval *_tv, - struct timezone *_tz) -{ - long ret; - - asm( - "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) - : "memory", "edx"); - - return ret; + return VDSO_SYSCALL2(clock_gettime,,_clkid,_ts); } static __always_inline long -clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) -{ - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; -} - -static __always_inline -long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; + return VDSO_SYSCALL2(clock_getres,,_clkid,_ts); } #endif diff --git a/arch/x86/include/asm/vdso/sys_call.h b/arch/x86/include/asm/vdso/sys_call.h new file mode 100644 index 000000000000..6b1fbcdcbd5c --- /dev/null +++ b/arch/x86/include/asm/vdso/sys_call.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Macros for issuing an inline system call from the vDSO. + */ + +#ifndef X86_ASM_VDSO_SYS_CALL_H +#define X86_ASM_VDSO_SYS_CALL_H + +#include +#include +#include + +/* + * Note: only three arguments are currently supported, + * because there are no constraint letters for r10, r8, r9. + */ +#ifdef CONFIG_X86_64 +/* Using dummy output registers instead of clobbers avoids messing up + user-specified clobbers. */ +#define __sys_instr "syscall" +#define __sys_clobber "rcx", "r11", "memory" +#define __sys_nr(x,y) __NR_ ## x +#define __sys_reg1 "rdi" +#define __sys_reg2 "rsi" +#define __sys_reg3 "rdx" +#define __sys_reg4 "r10" +#define __sys_reg5 "r8" +#define __sys_reg6 "r9" +#else +#define __sys_instr "call __kernel_vsyscall" +#define __sys_clobber "memory" +#define __sys_nr(x,y) __NR_ ## x ## y +#define __sys_reg1 "ebx" +#define __sys_reg2 "ecx" +#define __sys_reg3 "edx" +#define __sys_reg4 "esi" +#define __sys_reg5 "edi" +#define __sys_reg6 "ebp" +#endif + +/* + * Example usage: + * + * result = VDSO_SYSCALL3(foo,64,x,y,z); + * + * ... calls foo(x,y,z) on 64 bits, and foo64(x,y,z) on 32 bits. + */ +#define _VDSO_SYSCALL(name,suf32,...) \ + ({ \ + long _sys_num_ret = __sys_nr(name,suf32); \ + asm_inline volatile( \ + __sys_instr \ + : "+a" (_sys_num_ret) \ + : __VA_ARGS__ \ + : __sys_clobber); \ + _sys_num_ret; \ + }) + +#define VDSO_SYSCALL0(name,suf32) \ + _VDSO_SYSCALL(name,suf32) +#define VDSO_SYSCALL1(name,suf32,a1) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1)); \ + }) +#define VDSO_SYSCALL2(name,suf32,a1,a2) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2)); \ + }) +#define VDSO_SYSCALL3(name,suf32,a1,a2,a3) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + register long _sys_arg3 asm(__sys_reg3) = (long)(a3); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2), \ + "r" (_sys_arg3)); \ + }) +#define VDSO_SYSCALL4(name,suf32,a1,a2,a3,a4) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + register long _sys_arg3 asm(__sys_reg3) = (long)(a3); \ + register long _sys_arg4 asm(__sys_reg4) = (long)(a4); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2), \ + "r" (_sys_arg3), "r" (_sys_arg4)); \ + }) +#define VDSO_SYSCALL5(name,suf32,a1,a2,a3,a4,a5) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + register long _sys_arg3 asm(__sys_reg3) = (long)(a3); \ + register long _sys_arg4 asm(__sys_reg4) = (long)(a4); \ + register long _sys_arg5 asm(__sys_reg5) = (long)(a5); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2), \ + "r" (_sys_arg3), "r" (_sys_arg4), \ + "r" (_sys_arg5)); \ + }) +#define VDSO_SYSCALL6(name,suf32,a1,a2,a3,a4,a5,a6) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + register long _sys_arg3 asm(__sys_reg3) = (long)(a3); \ + register long _sys_arg4 asm(__sys_reg4) = (long)(a4); \ + register long _sys_arg5 asm(__sys_reg5) = (long)(a5); \ + register long _sys_arg6 asm(__sys_reg6) = (long)(a6); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2), \ + "r" (_sys_arg3), "r" (_sys_arg4), \ + "r" (_sys_arg5), "r" (_sys_arg6)); \ + }) + +#endif /* X86_VDSO_SYS_CALL_H */ -- 2.51.1 In most cases, the use of "fast 32-bit system call" depends either on X86_FEATURE_SEP or X86_FEATURE_SYSENTER32 || X86_FEATURE_SYSCALL32. However, nearly all the logic for both is identical. Define X86_FEATURE_SYSFAST32 which indicates that *either* SYSENTER32 or SYSCALL32 should be used, for either 32- or 64-bit kernels. This defaults to SYSENTER; use SYSCALL if the SYSCALL32 bit is also set. As this removes ALL existing uses of X86_FEATURE_SYSENTER32, which is a kernel-only synthetic feature bit, simply remove it and replace it with X86_FEATURE_SYSFAST32. This leaves an unused alternative for a true 32-bit kernel, but that should really not matter in any way. The clearing of X86_FEATURE_SYSCALL32 can be removed once the patches for automatically clearing disabled features has been merged. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/Kconfig.cpufeatures | 8 +++++++ arch/x86/entry/vdso/vdso32/system_call.S | 8 ++----- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/centaur.c | 3 --- arch/x86/kernel/cpu/common.c | 8 +++++++ arch/x86/kernel/cpu/intel.c | 4 +--- arch/x86/kernel/cpu/zhaoxin.c | 4 +--- arch/x86/kernel/fred.c | 2 +- arch/x86/xen/setup.c | 28 +++++++++++++++--------- arch/x86/xen/smp_pv.c | 5 ++--- arch/x86/xen/xen-ops.h | 1 - 11 files changed, 42 insertions(+), 31 deletions(-) diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures index 250c10627ab3..2808b8aee8df 100644 --- a/arch/x86/Kconfig.cpufeatures +++ b/arch/x86/Kconfig.cpufeatures @@ -56,6 +56,10 @@ config X86_REQUIRED_FEATURE_MOVBE def_bool y depends on MATOM +config X86_REQUIRED_FEATURE_SYSFAST32 + def_bool y + depends on X86_64 && !X86_FRED + config X86_REQUIRED_FEATURE_CPUID def_bool y depends on X86_64 @@ -120,6 +124,10 @@ config X86_DISABLED_FEATURE_CENTAUR_MCR def_bool y depends on X86_64 +config X86_DISABLED_FEATURE_SYSCALL32 + def_bool y + depends on !X86_64 + config X86_DISABLED_FEATURE_PCID def_bool y depends on !X86_64 diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 2a15634bbe75..7b1c0f16e511 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -52,13 +52,9 @@ __kernel_vsyscall: #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" -#ifdef BUILD_VDSO32_64 /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ - ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \ - SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 -#else - ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP -#endif + ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSFAST32, \ + SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 /* Enter using int $0x80 */ int $0x80 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4091a776e37a..f9d1c404b750 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ #define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ #define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_SYSFAST32 ( 3*32+15) /* sysenter/syscall in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ #define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index a3b55db35c96..9833f837141c 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -102,9 +102,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) (c->x86 >= 7)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#endif if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 02d97834a1d4..25af63f0c449 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1049,6 +1049,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); + if (IS_ENABLED(CONFIG_X86_64) || cpu_has(c, X86_FEATURE_SEP)) + set_cpu_cap(c, X86_FEATURE_SYSFAST32); + /* * Clear/Set all flags overridden by options, after probe. * This needs to happen each time we re-probe, which may happen @@ -1794,6 +1797,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) * that it can't be enabled in 32-bit mode. */ setup_clear_cpu_cap(X86_FEATURE_PCID); + + /* + * Never use SYSCALL on a 32-bit kernel + */ + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); #endif /* diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 98ae4c37c93e..646ff33c4651 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -236,9 +236,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_PSE); } -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else +#ifndef CONFIG_X86_64 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 89b1c8a70fe8..031379b7d4fa 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -59,9 +59,7 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) { if (c->x86 >= 0x6) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#endif + if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c index 816187da3a47..e736b19e18de 100644 --- a/arch/x86/kernel/fred.c +++ b/arch/x86/kernel/fred.c @@ -68,7 +68,7 @@ void cpu_init_fred_exceptions(void) idt_invalidate(); /* Use int $0x80 for 32-bit system calls in FRED mode */ - setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); + setup_clear_cpu_cap(X86_FEATURE_SYSFAST32); setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3823e52aef52..ac8021c3a997 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -990,13 +990,6 @@ static int register_callback(unsigned type, const void *func) return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); } -void xen_enable_sysenter(void) -{ - if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) && - register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat)) - setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); -} - void xen_enable_syscall(void) { int ret; @@ -1008,11 +1001,27 @@ void xen_enable_syscall(void) mechanism for syscalls. */ } - if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) && - register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat)) + if (!cpu_feature_enabled(X86_FEATURE_SYSFAST32)) + return; + + if (cpu_feature_enabled(X86_FEATURE_SYSCALL32)) { + /* Use SYSCALL32 */ + ret = register_callback(CALLBACKTYPE_syscall32, + xen_entry_SYSCALL_compat); + + } else { + /* Use SYSENTER32 */ + ret = register_callback(CALLBACKTYPE_sysenter, + xen_entry_SYSENTER_compat); + } + + if (ret) { setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + setup_clear_cpu_cap(X86_FEATURE_SYSFAST32); + } } + static void __init xen_pvmmu_arch_setup(void) { HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); @@ -1022,7 +1031,6 @@ static void __init xen_pvmmu_arch_setup(void) register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) BUG(); - xen_enable_sysenter(); xen_enable_syscall(); } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 9bb8ff8bff30..c40f326f0c3a 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -65,10 +65,9 @@ static void cpu_bringup(void) touch_softlockup_watchdog(); /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ - if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { - xen_enable_sysenter(); + if (!xen_feature(XENFEAT_supervisor_mode_kernel)) xen_enable_syscall(); - } + cpu = smp_processor_id(); identify_secondary_cpu(cpu); set_cpu_sibling_map(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 090349baec09..f6c331b20fad 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -60,7 +60,6 @@ phys_addr_t __init xen_find_free_area(phys_addr_t size); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void xen_banner(void); -void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); -- 2.51.1 When neither sysenter32 nor syscall32 is available (on either FRED-capable 64-bit hardware or old 32-bit hardware), there is no reason to do a bunch of stack shuffling in __kernel_vsyscall. Unfortunately, just overwriting the initial "push" instructions will mess up the CFI annotations, so suffer the 3-byte NOP if not applicable. Similarly, inline the int $0x80 when doing inline system calls in the vdso instead of calling __kernel_vsyscall. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/entry/vdso/vdso32/system_call.S | 18 ++++++++++++++---- arch/x86/include/asm/vdso/sys_call.h | 4 +++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 7b1c0f16e511..9157cf9c5749 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -14,6 +14,18 @@ ALIGN __kernel_vsyscall: CFI_STARTPROC + + /* + * If using int $0x80, there is no reason to muck about with the + * stack here. Unfortunately just overwriting the push instructions + * would mess up the CFI annotations, but it is only a 3-byte + * NOP in that case. This could be avoided by patching the + * vdso symbol table (not the code) and entry point, but that + * would a fair bit of tooling work or by simply compiling + * two different vDSO images, but that doesn't seem worth it. + */ + ALTERNATIVE "int $0x80; ret", "", X86_FEATURE_SYSFAST32 + /* * Reshuffle regs so that all of any of the entry instructions * will preserve enough state. @@ -52,11 +64,9 @@ __kernel_vsyscall: #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" - /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ - ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSFAST32, \ - SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 + ALTERNATIVE SYSENTER_SEQUENCE, SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 - /* Enter using int $0x80 */ + /* Re-enter using int $0x80 */ int $0x80 SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) diff --git a/arch/x86/include/asm/vdso/sys_call.h b/arch/x86/include/asm/vdso/sys_call.h index 6b1fbcdcbd5c..603ad8a83c66 100644 --- a/arch/x86/include/asm/vdso/sys_call.h +++ b/arch/x86/include/asm/vdso/sys_call.h @@ -27,7 +27,9 @@ #define __sys_reg5 "r8" #define __sys_reg6 "r9" #else -#define __sys_instr "call __kernel_vsyscall" +#define __sys_instr ALTERNATIVE("ds;ds;ds;int $0x80", \ + "call __kernel_vsyscall", \ + X86_FEATURE_SYSFAST32) #define __sys_clobber "memory" #define __sys_nr(x,y) __NR_ ## x ## y #define __sys_reg1 "ebx" -- 2.51.1