37c9b25886
commit afd30525a659ac0ae0904f0cb4a2ca75522c3123 upstream. USERGS_SYSRET64 is used to return from a syscall via SYSRET, but a Xen PV guest will nevertheless use the IRET hypercall, as there is no sysret PV hypercall defined. So instead of testing all the prerequisites for doing a sysret and then mangling the stack for Xen PV again for doing an iret just use the iret exit from the beginning. This can easily be done via an ALTERNATIVE like it is done for the sysenter compat case already. It should be noted that this drops the optimization in Xen for not restoring a few registers when returning to user mode, but it seems as if the saved instructions in the kernel more than compensate for this drop (a kernel build in a Xen PV guest was slightly faster with this patch applied). While at it remove the stale sysret32 remnants. [ pawan: Brad Spengler and Salvatore Bonaccorso <carnil@debian.org> reported a problem with the 5.10 backport commit edc702b4a820 ("x86/entry_64: Add VERW just before userspace transition"). When CONFIG_PARAVIRT_XXL=y, CLEAR_CPU_BUFFERS is not executed in syscall_return_via_sysret path as USERGS_SYSRET64 is runtime patched to: .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8, 0x48, 0x0f, 0x07 }, // swapgs; sysretq which is missing CLEAR_CPU_BUFFERS. It turns out dropping USERGS_SYSRET64 simplifies the code, allowing CLEAR_CPU_BUFFERS to be explicitly added to syscall_return_via_sysret path. Below is with CONFIG_PARAVIRT_XXL=y and this patch applied: syscall_return_via_sysret: ... <+342>: swapgs <+345>: xchg %ax,%ax <+347>: verw -0x1a2(%rip) <------ <+354>: sysretq ] Signed-off-by: Juergen Gross <jgross@suse.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> Link: https://lkml.kernel.org/r/20210120135555.32594-6-jgross@suse.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
102 lines
2.9 KiB
C
Executable file
102 lines
2.9 KiB
C
Executable file
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/stringify.h>
|
|
|
|
#include <asm/paravirt.h>
|
|
#include <asm/asm-offsets.h>
|
|
|
|
#define PSTART(d, m) \
|
|
patch_data_##d.m
|
|
|
|
#define PEND(d, m) \
|
|
(PSTART(d, m) + sizeof(patch_data_##d.m))
|
|
|
|
#define PATCH(d, m, insn_buff, len) \
|
|
paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m))
|
|
|
|
#define PATCH_CASE(ops, m, data, insn_buff, len) \
|
|
case PARAVIRT_PATCH(ops.m): \
|
|
return PATCH(data, ops##_##m, insn_buff, len)
|
|
|
|
#ifdef CONFIG_PARAVIRT_XXL
|
|
struct patch_xxl {
|
|
const unsigned char irq_irq_disable[1];
|
|
const unsigned char irq_irq_enable[1];
|
|
const unsigned char irq_save_fl[2];
|
|
const unsigned char mmu_read_cr2[3];
|
|
const unsigned char mmu_read_cr3[3];
|
|
const unsigned char mmu_write_cr3[3];
|
|
const unsigned char irq_restore_fl[2];
|
|
const unsigned char cpu_wbinvd[2];
|
|
const unsigned char mov64[3];
|
|
};
|
|
|
|
static const struct patch_xxl patch_data_xxl = {
|
|
.irq_irq_disable = { 0xfa }, // cli
|
|
.irq_irq_enable = { 0xfb }, // sti
|
|
.irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
|
|
.mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
|
|
.mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
|
|
.mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
|
|
.irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
|
|
.cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
|
|
.mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
|
|
};
|
|
|
|
unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
|
|
{
|
|
return PATCH(xxl, mov64, insn_buff, len);
|
|
}
|
|
# endif /* CONFIG_PARAVIRT_XXL */
|
|
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
struct patch_lock {
|
|
unsigned char queued_spin_unlock[3];
|
|
unsigned char vcpu_is_preempted[2];
|
|
};
|
|
|
|
static const struct patch_lock patch_data_lock = {
|
|
.vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax
|
|
|
|
# ifdef CONFIG_X86_64
|
|
.queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi)
|
|
# else
|
|
.queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax)
|
|
# endif
|
|
};
|
|
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
|
|
|
|
unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
|
|
unsigned int len)
|
|
{
|
|
switch (type) {
|
|
|
|
#ifdef CONFIG_PARAVIRT_XXL
|
|
PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
|
|
PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
|
|
PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
|
|
PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
|
|
|
|
PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
|
|
PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
|
|
PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
|
|
|
|
PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
|
|
#endif
|
|
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
case PARAVIRT_PATCH(lock.queued_spin_unlock):
|
|
if (pv_is_native_spin_unlock())
|
|
return PATCH(lock, queued_spin_unlock, insn_buff, len);
|
|
break;
|
|
|
|
case PARAVIRT_PATCH(lock.vcpu_is_preempted):
|
|
if (pv_is_native_vcpu_is_preempted())
|
|
return PATCH(lock, vcpu_is_preempted, insn_buff, len);
|
|
break;
|
|
#endif
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return paravirt_patch_default(type, insn_buff, addr, len);
|
|
}
|