From: Mike Travis <travis@sgi.com>
Subject: UV: Use NMI Backend
References: bnc#744655
Patch-mainline: Submitted 2 Feb 2012

To support the UV NMI in sles11sp2, we need to attach to the DIE_NMI
notifier chain as the DIE_NMIUNKNOWN happens too late and sometimes
not all CPU's respond to the BMC NMI signal.  Also, we need to return
NOTIFY_STOP to signal the notifier call chain caller that we've
handled the event and it can stop.  This avoids the "Dazed and Confused"
message.

Note that this does not have the NMI perf handling optimizations, that
will be in a separate patch.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
 arch/x86/kernel/apic/x2apic_uv_x.c |   52 +++++++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 13 deletions(-)

--- linux-3.0.orig/arch/x86/kernel/apic/x2apic_uv_x.c
+++ linux-3.0/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -25,6 +25,7 @@
 #include <linux/kdebug.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
+#include <linux/lkdb.h>
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
@@ -672,14 +673,19 @@ void __cpuinit uv_cpu_init(void)
 }
 
 /*
- * When NMI is received, print a stack trace.
+ * When an NMI from the BMC is received:
+ * 	- call KDB if active
+ * 	- print a stack trace if kdb is not active.
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
+ 	struct die_args *args = data;
+ 	struct pt_regs *regs = args->regs;
+ 	static int controlling_cpu = -1;
 	unsigned long real_uv_nmi;
-	int bid;
+	int bid, handled = 0;
 
-	if (reason != DIE_NMIUNKNOWN)
+	if (reason != DIE_NMIUNKNOWN && reason != DIE_NMI)
 		return NOTIFY_OK;
 
 	if (in_crash_kexec)
@@ -695,18 +701,38 @@ int uv_handle_nmi(struct notifier_block
 	bid = uv_numa_blade_id();
 	real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
 
-	if (unlikely(real_uv_nmi)) {
-		spin_lock(&uv_blade_info[bid].nmi_lock);
-		real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
-		if (real_uv_nmi) {
-			uv_blade_info[bid].nmi_count++;
-			uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+	if (likely(!real_uv_nmi))
+		return NOTIFY_OK;
+
+#ifdef CONFIG_KDB
+	if (kdb_on) {
+		spin_lock(&uv_nmi_lock);
+		if (controlling_cpu == -1) {
+			controlling_cpu = smp_processor_id();
+			spin_unlock(&uv_nmi_lock);
+			(void)kdb(LKDB_REASON_NMI, reason, regs);
+			controlling_cpu = -1;
+		} else {
+			spin_unlock(&uv_nmi_lock);
+			(void)kdb(LKDB_REASON_ENTER_SLAVE, reason, regs);
+			while (controlling_cpu != -1)
+				cpu_relax();
 		}
-		spin_unlock(&uv_blade_info[bid].nmi_lock);
-	}
+		handled = 1;
+ 	}
+#endif
+
+	spin_lock(&uv_blade_info[bid].nmi_lock);
+	uv_blade_info[bid].nmi_count++;
+	uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+	spin_unlock(&uv_blade_info[bid].nmi_lock);
+
+	if (likely(__get_cpu_var(cpu_last_nmi_count) ==
+						uv_blade_info[bid].nmi_count))
+		return NOTIFY_STOP;
 
-	if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
-		return NOTIFY_DONE;
+	if (handled)
+		return NOTIFY_STOP;
 
 	__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
 

