Git-commit: d70ed2e4fafdbef0800e73942482bb075c21578b
From: Andrei Warkentin <andreiw@vmware.com>
Date: Tue, 18 Oct 2011 12:16:48 +1100
Subject: [PATCH] MD: Allow restarting an interrupted incremental recovery.
Patch-mainline: 3.2
References: bnc#771065

If an incremental recovery was interrupted, a subsequent
re-add will result in a full recovery, even though an
incremental should be possible (seen with raid1).

Solve this problem by not updating the superblock on the
recovering device until array is not degraded any longer.

Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrei Warkentin <andreiw@vmware.com>
Signed-off-by: NeilBrown <neilb@suse.de>

---
 drivers/md/md.c |   34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

--- linux-3.0-SLE11-SP3.orig/drivers/md/md.c
+++ linux-3.0-SLE11-SP3/drivers/md/md.c
@@ -2486,24 +2486,26 @@ rewrite:
 	bitmap_update_sb(mddev->bitmap);
 	list_for_each_entry(rdev, &mddev->disks, same_set) {
 		char b[BDEVNAME_SIZE];
-		dprintk(KERN_INFO "md: ");
+
 		if (rdev->sb_loaded != 1)
 			continue; /* no noise on spare devices */
-		if (test_bit(Faulty, &rdev->flags))
-			dprintk("(skipping faulty ");
-
-		dprintk("%s ", bdevname(rdev->bdev,b));
-		if (!test_bit(Faulty, &rdev->flags)) {
+		if (!test_bit(Faulty, &rdev->flags) &&
+		    rdev->saved_raid_disk == -1) {
 			md_super_write(mddev,rdev,
 				       rdev->sb_start, rdev->sb_size,
 				       rdev->sb_page);
-			dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
+			dprintk(KERN_INFO "md: (write) %s's sb offset: %llu\n",
 				bdevname(rdev->bdev,b),
 				(unsigned long long)rdev->sb_start);
 			rdev->sb_events = mddev->events;
 
-		} else
-			dprintk(")\n");
+		} else if (test_bit(Faulty, &rdev->flags))
+			dprintk("md: %s (skipping faulty)\n",
+				bdevname(rdev->bdev, b));
+		else
+			dprintk("md: %s (skipping incremental s/r)\n",
+				bdevname(rdev->bdev, b));
+
 		if (mddev->level == LEVEL_MULTIPATH)
 			/* only need to write one superblock... */
 			break;
@@ -7593,15 +7595,19 @@ static void reap_sync_thread(mddev_t *md
 	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
 	    mddev->pers->finish_reshape)
 		mddev->pers->finish_reshape(mddev);
-	md_update_sb(mddev, 1);
 
-	/* if array is no-longer degraded, then any saved_raid_disk
-	 * information must be scrapped
+	/* If array is no-longer degraded, then any saved_raid_disk
+	 * information must be scrapped.  Also if any device is now
+	 * In_sync we must scrape the saved_raid_disk for that device
+	 * do the superblock for an incrementally recovered device
+	 * written out.
 	 */
-	if (!mddev->degraded)
-		list_for_each_entry(rdev, &mddev->disks, same_set)
+	list_for_each_entry(rdev, &mddev->disks, same_set)
+		if (!mddev->degraded ||
+		    test_bit(In_sync, &rdev->flags))
 			rdev->saved_raid_disk = -1;
 
+	md_update_sb(mddev, 1);
 	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
 	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
