From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH] x86/spec-ctrl: Remove conditional IRQs-on-ness for INT $0x80/0x82 paths

Before speculation defences, some paths in Xen could genuinely get away with
being IRQs-on at entry.  But XPTI invalidated this property on most paths, and
attempting to maintain it on the remaining paths was a mistake.

Fast forward, and DO_SPEC_CTRL_COND_IBPB (protection for AMD BTC/SRSO) is not
IRQ-safe, running with IRQs enabled in some cases.  The other actions taken on
these paths happen to be IRQ-safe.

Make entry_int82() and int80_direct_trap() unconditionally Interrupt Gates
rather than Trap Gates.  Remove the conditional re-adjustment of
int80_direct_trap() in smp_prepare_cpus(), and have entry_int82() explicitly
enable interrupts when safe to do so.

In smp_prepare_cpus(), with the conditional re-adjustment removed, the
clearing of pv_cr3 is the only remaining action gated on XPTI, and it is out
of place anyway, repeating work already done by smp_prepare_boot_cpu().  Drop
the entire if() condition to avoid leaving an incorrect vestigial remnant.

Also drop comments which make incorrect statements about when its safe to
enable interrupts.

This is XSA-446 / CVE-2023-46836

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -1081,17 +1081,6 @@ void __init smp_prepare_cpus(unsigned in
     rc = setup_cpu_root_pgt(0);
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
-    if ( per_cpu(root_pgt, 0) )
-    {
-        get_cpu_info()->pv_cr3 = 0;
-
-        /*
-         * All entry points which may need to switch page tables have to start
-         * with interrupts off. Re-write what pv_trap_init() has put there.
-         */
-        _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3,
-                  &int80_direct_trap);
-    }
 
     set_nr_sockets();
 
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -4073,10 +4073,11 @@ void __init trap_init(void)
 
     /* The 32-on-64 hypercall vector is only accessible from ring 1. */
     _set_gate(idt_table + HYPERCALL_VECTOR,
-              SYS_DESC_trap_gate, 1, entry_int82);
+              SYS_DESC_irq_gate, 1, entry_int82);
 
     /* Fast trap for int80 (faster than taking the #GP-fixup path). */
-    _set_gate(idt_table + 0x80, SYS_DESC_trap_gate, 3, &int80_direct_trap);
+    _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3,
+              &int80_direct_trap);
 
     for ( vector = 0; vector < NR_VECTORS; ++vector )
     {
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -20,6 +20,8 @@ ENTRY(entry_int82)
         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
+        sti
+
         CR4_PV32_RESTORE
 
         GET_CURRENT(bx)
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -302,7 +302,6 @@ ENTRY(lstar_enter)
         jmp   test_all_events
 
 ENTRY(sysenter_entry)
-        /* sti could live here when we don't switch page tables below. */
         pushq $FLAT_USER_SS
         pushq $0
         pushfq
