From: Russ Anderson <rja@sgi.com>
Subject: mm: Avoid putting a bad page back on the LRU v8
References: 415829
Acked-by: schwab@suse.de

Prevent a page with a physical memory error from being placed back
on the LRU.  A new page flag (PG_memerror) is added if
CONFIG_PAGEFLAGS_EXTENDED is defined.  

Version 8 change:  Removed hot path check for pages with memory
errors on the free list.

Signed-off-by: Russ Anderson <rja@sgi.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>

---
 include/linux/page-flags.h |   16 +++++++++++++++-
 mm/migrate.c               |   35 ++++++++++++++++++++++++++++++++++-
 mm/vmscan.c                |    1 +
 3 files changed, 50 insertions(+), 2 deletions(-)

--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -85,6 +85,7 @@ enum pageflags {
 	PG_private_2,		/* If pagecache, has fs aux data */
 	PG_writeback,		/* Page is under writeback */
 #ifdef CONFIG_PAGEFLAGS_EXTENDED
+	PG_memerror,		/* Page has a physical memory error */
 	PG_head,		/* A head page */
 	PG_tail,		/* A tail page */
 #else
@@ -166,14 +167,21 @@ static inline int TestClearPage##uname(s
 static inline int __TestClearPage##uname(struct page *page)		\
 		{ return __test_and_clear_bit(PG_##lname, &page->flags); }
 
+#define PAGEFLAGMASK(uname, lname)					\
+static inline int PAGEMASK_##uname(void)				\
+		{ return (1 << PG_##lname); }
+
 #define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
-	SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
+	SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)		\
+	PAGEFLAGMASK(uname, lname)
 
 #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 	__SETPAGEFLAG(uname, lname)  __CLEARPAGEFLAG(uname, lname)
 
 #define PAGEFLAG_FALSE(uname) 						\
 static inline int Page##uname(struct page *page) 			\
+			{ return 0; }					\
+static inline int PAGEMASK_##uname(void)				\
 			{ return 0; }
 
 #define TESTSCFLAG(uname, lname)					\
@@ -404,6 +412,12 @@ static inline void ClearPageCompound(str
 
 #endif /* !PAGEFLAGS_EXTENDED */
 
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+PAGEFLAG(MemError, memerror)
+#else
+PAGEFLAG_FALSE(MemError)
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * PageHuge() only returns true for hugetlbfs pages, but not for
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -66,6 +66,7 @@ int migrate_prep_local(void)
 
 	return 0;
 }
+EXPORT_SYMBOL(migrate_prep);
 
 /*
  * Add isolated pages on the list back to the LRU under page lock
@@ -83,6 +84,7 @@ void putback_lru_pages(struct list_head
 		putback_lru_page(page);
 	}
 }
+EXPORT_SYMBOL(putback_lru_pages);
 
 /*
  * Restore a potential migration pte to a working pte entry
@@ -864,6 +866,25 @@ out:
 		 * restored.
 		 */
 		list_del(&page->lru);
+		if (PageMemError(page)) {
+			if (rc == 0)
+				/*
+				 * A page with a memory error that has
+				 * been migrated will not be moved to
+				 * the LRU.
+				 */
+				goto move_newpage;
+			else
+				/*
+				 * The page failed to migrate and will not
+				 * be added to the bad page list.  Clearing
+				 * the error bit will allow another attempt
+				 * to migrate if it gets another correctable
+				 * error.
+				 */
+				ClearPageMemError(page);
+		}
+
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
 				page_is_file_cache(page));
 		putback_lru_page(page);
@@ -978,7 +999,7 @@ int migrate_pages(struct list_head *from
 	struct page *page;
 	struct page *page2;
 	int swapwrite = current->flags & PF_SWAPWRITE;
-	int rc;
+	int rc = 0;
 
 	if (!swapwrite)
 		current->flags |= PF_SWAPWRITE;
@@ -1008,6 +1029,17 @@ int migrate_pages(struct list_head *from
 			}
 		}
 	}
+
+	if (rc != 0)
+		list_for_each_entry_safe(page, page2, from, lru)
+			if (PageMemError(page))
+				/*
+				 * The page failed to migrate.  Clearing
+				 * the error bit will allow another attempt
+				 * to migrate if it gets another correctable
+				 * error.
+				 */
+				ClearPageMemError(page);
 	rc = 0;
 out:
 	if (!swapwrite)
@@ -1062,6 +1094,7 @@ out:
 
 	return nr_failed + retry;
 }
+EXPORT_SYMBOL(migrate_pages);
 
 #ifdef CONFIG_NUMA
 /*
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1337,6 +1337,7 @@ int isolate_lru_page(struct page *page)
 	}
 	return ret;
 }
+EXPORT_SYMBOL(isolate_lru_page);
 
 /*
  * Are there way too many processes in the direct reclaim path already?
