From: Takashi Iwai <tiwai@suse.de>
Subject: [PATCH] x86: kexec: Temporary hack to allow crashkernel=512M on server hardware (v2)
Patch-mainline: Never.  A right fix is needed instead.
References: bnc#726850

crashkernel= was partially broken with the introduction of memblock
where the size of the crashkernel was limited to 448M. This was "fixed"
by commit [4b239f458: x86-64, mm: Put early page table high] which in
turn broken everything from xen, to suspend to some AMD machines.

The suspend problem was still unresolved for 3.0.8 so commit [8548c84d:
X86: Fix S4 regression] partially reverted it. This leaves us in
a position where we can either have large crash kernels or fully
working suspend.

This patch provides a partial workaround for a large crashkernel by
checking the crashkernel boot options and switching to the behavior of
higher early pagetables if such an option is given.  If no crahskernel
or a smaller size is given, it falls back to the normal way so that S4
works.  When the switching happens, the kernel shows the information
and warns S4 breakage.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

---
 arch/x86/mm/init.c |   34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -3,6 +3,9 @@
 #include <linux/ioport.h>
 #include <linux/swap.h>
 #include <linux/memblock.h>
+/* for bnc#726850 */
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -28,6 +31,27 @@ int direct_gbpages
 #endif
 ;
 
+/* Is a hack for a large crashkernel (>= 512M) needed? (bnc#726850) */
+static bool need_crashkernel_hack(void)
+{
+#if defined(CONFIG_X86_64) && defined(CONFIG_KEXEC)
+	unsigned long long total_mem;
+	unsigned long long crash_size, crash_base;
+	total_mem = max_pfn - min_low_pfn;
+	total_mem <<= PAGE_SHIFT;
+	if (!parse_crashkernel(boot_command_line, total_mem,
+			       &crash_size, &crash_base) &&
+	    /* 448MB = 512MB - 64MB */
+	    crash_size >= 448 * 1024 * 1024) {
+		pr_info_once("Relocating page tables to higher address to "
+			     "make space for crashkernel memory; "
+			     "S4 might be broken\n");
+		return true;
+	}
+#endif
+	return false;
+}
+
 struct map_range {
 	unsigned long start;
 	unsigned long end;
@@ -39,11 +63,12 @@ struct map_range {
  * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB
  * pages. Then find enough contiguous space for those page tables.
  */
-static void __init find_early_table_space(struct map_range *mr, int nr_range)
+static void __init find_early_table_space(unsigned long end,
+		struct map_range *mr, int nr_range)
 {
 	int i;
 	unsigned long puds = 0, pmds = 0, ptes = 0, tables;
-	unsigned long start = 0, good_end;
+	unsigned long start = 0, good_end = end;
 	unsigned long pgd_extra = 0;
 	phys_addr_t base;
 
@@ -83,7 +108,8 @@ static void __init find_early_table_spac
 	/* for fixmap */
 	tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
 #endif
-	good_end = max_pfn_mapped << PAGE_SHIFT;
+	if (!need_crashkernel_hack())
+		good_end = max_pfn_mapped << PAGE_SHIFT;
 
 	base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
 	if (base == MEMBLOCK_ERROR)
@@ -274,7 +300,7 @@ unsigned long __init_refok init_memory_m
 	 * nodes are discovered.
 	 */
 	if (!after_bootmem)
-		find_early_table_space(mr, nr_range);
+		find_early_table_space(end, mr, nr_range);
 
 	for (i = 0; i < nr_range; i++)
 		ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
