# Commit d0c75dc4c028bc32fd9cf1d4358973929548de54
# Date 2025-12-01 16:20:41 +0000
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/amd: Fix race editing DE_CFG

We have two different functions explaining that DE_CFG is Core-scoped and that
writes are racy but happen to be safe.  This is only true when there's one of
them.

Introduce amd_init_de_cfg() to be the singular function which writes to
DE_CFG, modelled after the logic we already have for BP_CFG.

While reworking amd_check_zenbleed() into a simple predicate used by
amd_init_de_cfg(), fix the microcode table.  The 'good_rev' was specific to an
individual stepping and not valid to be matched by model, let alone a range.
The only CPUs incorrectly matched that I can locate appear to be
pre-production, and probably didn't get Zenbleed microcode.

Rework amd_init_lfence() to be amd_init_lfence_dispatch() with only the
purpose of configuring X86_FEATURE_LFENCE_DISPATCH in the case that it needs
synthesising.  Run it on the BSP only and use setup_force_cpu_cap() to prevent
the bit disappearing on a subseuqent CPUID rescan.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -681,45 +681,6 @@ void cf_check early_init_amd(struct cpui
 	ctxt_switch_levelling(NULL);
 }
 
-void amd_init_lfence(struct cpuinfo_x86 *c)
-{
-	uint64_t value;
-
-	/*
-	 * Some hardware has LFENCE dispatch serialising always enabled,
-	 * nothing to do on that case.
-	 */
-	if (test_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability))
-		return;
-
-	/*
-	 * Attempt to set lfence to be Dispatch Serialising.  This MSR almost
-	 * certainly isn't virtualised (and Xen at least will leak the real
-	 * value in but silently discard writes), as well as being per-core
-	 * rather than per-thread, so do a full safe read/write/readback cycle
-	 * in the worst case.
-	 */
-	if (rdmsr_safe(MSR_AMD64_DE_CFG, value))
-		/* Unable to read.  Assume the safer default. */
-		__clear_bit(X86_FEATURE_LFENCE_DISPATCH,
-			    c->x86_capability);
-	else if (value & AMD64_DE_CFG_LFENCE_SERIALISE)
-		/* Already dispatch serialising. */
-		__set_bit(X86_FEATURE_LFENCE_DISPATCH,
-			  c->x86_capability);
-	else if (wrmsr_safe(MSR_AMD64_DE_CFG,
-			    value | AMD64_DE_CFG_LFENCE_SERIALISE) ||
-		 rdmsr_safe(MSR_AMD64_DE_CFG, value) ||
-		 !(value & AMD64_DE_CFG_LFENCE_SERIALISE))
-		/* Attempt to set failed.  Assume the safer default. */
-		__clear_bit(X86_FEATURE_LFENCE_DISPATCH,
-			    c->x86_capability);
-	else
-		/* Successfully enabled! */
-		__set_bit(X86_FEATURE_LFENCE_DISPATCH,
-			  c->x86_capability);
-}
-
 /*
  * Refer to the AMD Speculative Store Bypass whitepaper:
  * https://developer.amd.com/wp-content/resources/124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf
@@ -913,76 +874,6 @@ void __init detect_zen2_null_seg_behavio
 
 }
 
-static void amd_check_zenbleed(void)
-{
-	const struct cpu_signature *sig = &this_cpu(cpu_sig);
-	unsigned int good_rev;
-	uint64_t val, old_val, chickenbit = (1 << 9);
-
-	/*
-	 * If we're virtualised, we can't do family/model checks safely, and
-	 * we likely wouldn't have access to DE_CFG even if we could see a
-	 * microcode revision.
-	 *
-	 * A hypervisor may hide AVX as a stopgap mitigation.  We're not in a
-	 * position to care either way.  An admin doesn't want to be disabling
-	 * AVX as a mitigation on any build of Xen with this logic present.
-	 */
-	if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17)
-		return;
-
-	switch (boot_cpu_data.x86_model) {
-	case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
-	case 0x60 ... 0x67: good_rev = 0x0860010b; break;
-	case 0x68 ... 0x6f: good_rev = 0x08608105; break;
-	case 0x70 ... 0x7f: good_rev = 0x08701032; break;
-	case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
-	default:
-		/*
-		 * With the Fam17h check above, most parts getting here are
-		 * Zen1.  They're not affected.  Assume Zen2 ones making it
-		 * here are affected regardless of microcode version.
-		 */
-		if (is_zen1_uarch())
-			return;
-		good_rev = ~0U;
-		break;
-	}
-
-	rdmsrl(MSR_AMD64_DE_CFG, val);
-	old_val = val;
-
-	/*
-	 * Microcode is the preferred mitigation, in terms of performance.
-	 * However, without microcode, this chickenbit (specific to the Zen2
-	 * uarch) disables Floating Point Mov-Elimination to mitigate the
-	 * issue.
-	 */
-	val &= ~chickenbit;
-	if (sig->rev < good_rev)
-		val |= chickenbit;
-
-	if (val == old_val)
-		/* Nothing to change. */
-		return;
-
-	/*
-	 * DE_CFG is a Core-scoped MSR, and this write is racy during late
-	 * microcode load.  However, both threads calculate the new value from
-	 * state which is shared, and unrelated to the old value, so the
-	 * result should be consistent.
-	 */
-	wrmsrl(MSR_AMD64_DE_CFG, val);
-
-	/*
-	 * Inform the admin that we changed something, but don't spam,
-	 * especially during a late microcode load.
-	 */
-	if (smp_processor_id() == 0)
-		printk(XENLOG_INFO "Zenbleed mitigation - using %s\n",
-		       val & chickenbit ? "chickenbit" : "microcode");
-}
-
 static void cf_check fam17_disable_c6(void *arg)
 {
 	/* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */
@@ -1009,6 +900,109 @@ static void cf_check fam17_disable_c6(vo
 	wrmsrl(MSR_AMD_CSTATE_CFG, val & mask);
 }
 
+static bool zenbleed_use_chickenbit(void)
+{
+    unsigned int curr_rev;
+    uint8_t fixed_rev;
+
+    /* Zenbleed only affects Zen2.  Nothing to do on non-Fam17h systems. */
+    if ( boot_cpu_data.x86 != 0x17 )
+        return false;
+
+    curr_rev = this_cpu(cpu_sig).rev;
+    switch ( curr_rev >> 8 )
+    {
+    case 0x083010: fixed_rev = 0x7a; break;
+    case 0x086001: fixed_rev = 0x0b; break;
+    case 0x086081: fixed_rev = 0x05; break;
+    case 0x087010: fixed_rev = 0x32; break;
+    case 0x08a000: fixed_rev = 0x08; break;
+    default:
+        /*
+         * With the Fam17h check above, most parts getting here are Zen1.
+         * They're not affected.  Assume Zen2 ones making it here are affected
+         * regardless of microcode version.
+         */
+        return is_zen2_uarch();
+    }
+
+    return (uint8_t)curr_rev >= fixed_rev;
+}
+
+void amd_init_de_cfg(const struct cpuinfo_x86 *c)
+{
+    uint64_t val, new = 0;
+
+    /*
+     * The MSR doesn't exist on Fam 0xf/0x11.  If virtualised, we won't have
+     * mutable access even if we can read it.
+     */
+    if ( c->x86 == 0xf || c->x86 == 0x11 || cpu_has_hypervisor )
+        return;
+
+    /*
+     * On Zen3 (Fam 0x19) and later CPUs, LFENCE is unconditionally dispatch
+     * serialising, and is enumerated in CPUID.
+     *
+     * On older systems, LFENCE is unconditionally dispatch serialising (when
+     * the MSR doesn't exist), or can be made so by setting this bit.
+     */
+    if ( !test_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability) )
+        new |= AMD64_DE_CFG_LFENCE_SERIALISE;
+
+    /*
+     * If vulnerable to Zenbleed and not mitigated in microcode, use the
+     * bigger hammer.
+     */
+    if ( zenbleed_use_chickenbit() )
+        new |= (1 << 9);
+
+    /* Avoid reading DE_CFG if we don't intend to change anything. */
+    if ( !new )
+        return;
+
+    rdmsrl(MSR_AMD64_DE_CFG, val);
+
+    if ( (val & new) == new )
+        return;
+
+    /*
+     * DE_CFG is a Core-scoped MSR, and this write is racy.  However, both
+     * threads calculate the new value from state which expected to be
+     * consistent across CPUs and unrelated to the old value, so the result
+     * should be consistent.
+     */
+    wrmsrl(MSR_AMD64_DE_CFG, val | new);
+}
+
+void __init amd_init_lfence_dispatch(void)
+{
+    struct cpuinfo_x86 *c = &boot_cpu_data;
+    uint64_t val;
+
+    if ( test_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability) )
+        /* LFENCE is forced dispatch serialising and we can't control it. */
+        return;
+
+    if ( c->x86 == 0xf || c->x86 == 0x11 )
+        /* MSR doesn't exist, LFENCE is dispatch serialising. */
+        goto set;
+
+    if ( rdmsr_safe(MSR_AMD64_DE_CFG, val) )
+        /* Unable to read.  Assume the safer default. */
+        goto clear;
+
+    if ( val & AMD64_DE_CFG_LFENCE_SERIALISE )
+        goto set;
+
+ clear:
+    setup_clear_cpu_cap(X86_FEATURE_LFENCE_DISPATCH);
+    return;
+
+ set:
+    setup_force_cpu_cap(X86_FEATURE_LFENCE_DISPATCH);
+}
+
 static void amd_check_bp_cfg(void)
 {
 	uint64_t val, new = 0;
@@ -1053,6 +1047,11 @@ static void cf_check init_amd(struct cpu
 
 	unsigned long long value;
 
+	amd_init_de_cfg(c);
+
+	if (c == &boot_cpu_data)
+		amd_init_lfence_dispatch(); /* Needs amd_init_de_cfg() */
+
 	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
 	 * bit 6 of msr C001_0015
 	 *
@@ -1091,12 +1090,6 @@ static void cf_check init_amd(struct cpu
 	if (c == &boot_cpu_data && !cpu_has(c, X86_FEATURE_RSTR_FP_ERR_PTRS))
 		setup_force_cpu_cap(X86_BUG_FPU_PTRS);
 
-	if (c->x86 == 0x0f || c->x86 == 0x11)
-		/* Always dispatch serialising on this hardare. */
-		__set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability);
-	else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */
-		amd_init_lfence(c);
-
 	amd_init_ssbd(c);
 
 	if (c->x86 == 0x17)
@@ -1313,7 +1306,6 @@ static void cf_check init_amd(struct cpu
 	if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC))
 		disable_c1_ramping();
 
-	amd_check_zenbleed();
 	amd_check_bp_cfg();
 
 	if (fam17_c6_disabled)
--- a/xen/arch/x86/cpu/cpu.h
+++ b/xen/arch/x86/cpu/cpu.h
@@ -20,7 +20,8 @@ extern bool detect_extended_topology(str
 
 void cf_check early_init_amd(struct cpuinfo_x86 *c);
 void amd_log_freq(const struct cpuinfo_x86 *c);
-void amd_init_lfence(struct cpuinfo_x86 *c);
+void amd_init_de_cfg(const struct cpuinfo_x86 *c);
+void amd_init_lfence_dispatch(void);
 void amd_init_ssbd(const struct cpuinfo_x86 *c);
 void amd_init_spectral_chicken(void);
 void detect_zen2_null_seg_behaviour(void);
--- a/xen/arch/x86/cpu/hygon.c
+++ b/xen/arch/x86/cpu/hygon.c
@@ -31,7 +31,11 @@ static void cf_check init_hygon(struct c
 {
 	unsigned long long value;
 
-	amd_init_lfence(c);
+	amd_init_de_cfg(c);
+
+	if (c == &boot_cpu_data)
+		amd_init_lfence_dispatch(); /* Needs amd_init_de_cfg() */
+
 	amd_init_ssbd(c);
 
 	/* Probe for NSCB on Zen2 CPUs when not virtualised */
