![Pawan Gupta](/assets/img/avatar_default.png)
commit 3c7501722e6b31a6e56edd23cea5e77dbb9ffd1a upstream. Mitigation for MDS is to use VERW instruction to clear any secrets in CPU Buffers. Any memory accesses after VERW execution can still remain in CPU buffers. It is safer to execute VERW late in return to user path to minimize the window in which kernel data can end up in CPU buffers. There are not many kernel secrets to be had after SWITCH_TO_USER_CR3. Add support for deploying VERW mitigation after user register state is restored. This helps minimize the chances of kernel data ending up into CPU buffers after executing VERW. Note that the mitigation at the new location is not yet enabled. Corner case not handled ======================= Interrupts returning to kernel don't clear CPUs buffers since the exit-to-user path is expected to do that anyways. But, there could be a case when an NMI is generated in kernel after the exit-to-user path has cleared the buffers. This case is not handled and NMI returning to kernel don't clear CPU buffers because: 1. It is rare to get an NMI after VERW, but before returning to user. 2. For an unprivileged user, there is no known way to make that NMI less rare or target it. 3. It would take a large number of these precisely-timed NMIs to mount an actual attack. There's presumably not enough bandwidth. 4. The NMI in question occurs after a VERW, i.e. when user state is restored and most interesting data is already scrubbed. Whats left is only the data that NMI touches, and that may or may not be of any interest. [ pawan: resolved conflict in syscall_return_via_sysret, added CLEAR_CPU_BUFFERS to USERGS_SYSRET64 ] Suggested-by: Dave Hansen <dave.hansen@intel.com> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Link: https://lore.kernel.org/all/20240213-delay-verw-v8-2-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
172 lines
3.3 KiB
C
Executable file
172 lines
3.3 KiB
C
Executable file
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _X86_IRQFLAGS_H_
|
|
#define _X86_IRQFLAGS_H_
|
|
|
|
#include <asm/processor-flags.h>
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <asm/nospec-branch.h>
|
|
|
|
/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
|
|
#define __cpuidle __section(".cpuidle.text")
|
|
|
|
/*
|
|
* Interrupt control:
|
|
*/
|
|
|
|
/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */
|
|
extern inline unsigned long native_save_fl(void);
|
|
extern __always_inline unsigned long native_save_fl(void)
|
|
{
|
|
unsigned long flags;
|
|
|
|
/*
|
|
* "=rm" is safe here, because "pop" adjusts the stack before
|
|
* it evaluates its effective address -- this is part of the
|
|
* documented behavior of the "pop" instruction.
|
|
*/
|
|
asm volatile("# __raw_save_flags\n\t"
|
|
"pushf ; pop %0"
|
|
: "=rm" (flags)
|
|
: /* no input */
|
|
: "memory");
|
|
|
|
return flags;
|
|
}
|
|
|
|
extern inline void native_restore_fl(unsigned long flags);
|
|
extern inline void native_restore_fl(unsigned long flags)
|
|
{
|
|
asm volatile("push %0 ; popf"
|
|
: /* no output */
|
|
:"g" (flags)
|
|
:"memory", "cc");
|
|
}
|
|
|
|
static __always_inline void native_irq_disable(void)
|
|
{
|
|
asm volatile("cli": : :"memory");
|
|
}
|
|
|
|
static __always_inline void native_irq_enable(void)
|
|
{
|
|
asm volatile("sti": : :"memory");
|
|
}
|
|
|
|
static inline __cpuidle void native_safe_halt(void)
|
|
{
|
|
mds_idle_clear_cpu_buffers();
|
|
asm volatile("sti; hlt": : :"memory");
|
|
}
|
|
|
|
static inline __cpuidle void native_halt(void)
|
|
{
|
|
mds_idle_clear_cpu_buffers();
|
|
asm volatile("hlt": : :"memory");
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_PARAVIRT_XXL
|
|
#include <asm/paravirt.h>
|
|
#else
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/types.h>
|
|
|
|
static __always_inline unsigned long arch_local_save_flags(void)
|
|
{
|
|
return native_save_fl();
|
|
}
|
|
|
|
static __always_inline void arch_local_irq_restore(unsigned long flags)
|
|
{
|
|
native_restore_fl(flags);
|
|
}
|
|
|
|
static __always_inline void arch_local_irq_disable(void)
|
|
{
|
|
native_irq_disable();
|
|
}
|
|
|
|
static __always_inline void arch_local_irq_enable(void)
|
|
{
|
|
native_irq_enable();
|
|
}
|
|
|
|
/*
|
|
* Used in the idle loop; sti takes one instruction cycle
|
|
* to complete:
|
|
*/
|
|
static inline __cpuidle void arch_safe_halt(void)
|
|
{
|
|
native_safe_halt();
|
|
}
|
|
|
|
/*
|
|
* Used when interrupts are already enabled or to
|
|
* shutdown the processor:
|
|
*/
|
|
static inline __cpuidle void halt(void)
|
|
{
|
|
native_halt();
|
|
}
|
|
|
|
/*
|
|
* For spinlocks, etc:
|
|
*/
|
|
static __always_inline unsigned long arch_local_irq_save(void)
|
|
{
|
|
unsigned long flags = arch_local_save_flags();
|
|
arch_local_irq_disable();
|
|
return flags;
|
|
}
|
|
#else
|
|
|
|
#define ENABLE_INTERRUPTS(x) sti
|
|
#define DISABLE_INTERRUPTS(x) cli
|
|
|
|
#ifdef CONFIG_X86_64
|
|
#ifdef CONFIG_DEBUG_ENTRY
|
|
#define SAVE_FLAGS(x) pushfq; popq %rax
|
|
#endif
|
|
|
|
#define INTERRUPT_RETURN jmp native_iret
|
|
#define USERGS_SYSRET64 \
|
|
swapgs; \
|
|
CLEAR_CPU_BUFFERS; \
|
|
sysretq;
|
|
#define USERGS_SYSRET32 \
|
|
swapgs; \
|
|
sysretl
|
|
|
|
#else
|
|
#define INTERRUPT_RETURN iret
|
|
#endif
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
#endif /* CONFIG_PARAVIRT_XXL */
|
|
|
|
#ifndef __ASSEMBLY__
|
|
static __always_inline int arch_irqs_disabled_flags(unsigned long flags)
|
|
{
|
|
return !(flags & X86_EFLAGS_IF);
|
|
}
|
|
|
|
static __always_inline int arch_irqs_disabled(void)
|
|
{
|
|
unsigned long flags = arch_local_save_flags();
|
|
|
|
return arch_irqs_disabled_flags(flags);
|
|
}
|
|
#else
|
|
#ifdef CONFIG_X86_64
|
|
#ifdef CONFIG_XEN_PV
|
|
#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
|
|
#else
|
|
#define SWAPGS swapgs
|
|
#endif
|
|
#endif
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif
|