From: NeilBrown <neilb@suse.de>
Subject: Correctly handle read failure from last working device in RAID10
Patch-mainline: 3.3
References: bnc#746717

When the last work device in a RAID10 reports a read error we don't
want to fail the device (it might just be a bad block) but rather
want to return an error to user-space.
But we don't - we retry indefinitely.

Fix this in two places:
1/ don't even try the retry if it obviously wont work
   (in raid10_end_read_request).  Sometimes it won't work,
   but that isn't obvious so this change isn't quite sufficient.

2/ When choosing the device to retry on, don't choose one that we
   recently tried to mark as faulty.  That would be dumb :-)

Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Hannes Reinecke <hare@suse.de>
---
 drivers/md/raid10.c |   20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

--- linux-3.0-SLE11-SP2-BTMU.orig/drivers/md/raid10.c
+++ linux-3.0-SLE11-SP2-BTMU/drivers/md/raid10.c
@@ -59,6 +59,7 @@
 
 static void allow_barrier(conf_t *conf);
 static void lower_barrier(conf_t *conf);
+static int enough(conf_t *conf, int ignore);
 
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -270,6 +271,19 @@ static void raid10_end_read_request(stru
 		 * wait for the 'master' bio.
 		 */
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
+	} else {
+		/* If all other devices that store this block have
+		 * failed, we want to return the error upwards rather
+		 * than fail the last device.  Here we redefine
+		 * "uptodate" to mean "Don't want to retry"
+		 */
+		unsigned long flags;
+		spin_lock_irqsave(&conf->device_lock, flags);
+		if (!enough(conf, dev))
+			uptodate = 1;
+		spin_unlock_irqrestore(&conf->device_lock, flags);
+	}
+	if (uptodate) {
 		raid_end_bio_io(r10_bio);
 		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 	} else {
@@ -1507,6 +1521,7 @@ static void fix_read_error(conf_t *conf,
 		       "md/raid10:%s: %s: Failing raid device\n",
 		       mdname(mddev), b);
 		md_error(mddev, conf->mirrors[d].rdev);
+		r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
 		return;
 	}
 
@@ -1547,6 +1562,7 @@ static void fix_read_error(conf_t *conf,
 			/* Cannot read from anywhere -- bye bye array */
 			int dn = r10_bio->devs[r10_bio->read_slot].devnum;
 			md_error(mddev, conf->mirrors[dn].rdev);
+			r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
 			break;
 		}
 
@@ -1764,8 +1780,10 @@ static void raid10d(mddev_t *mddev)
 				freeze_array(conf);
 				fix_read_error(conf, mddev, r10_bio);
 				unfreeze_array(conf);
-			} else
+			} else {
 				md_error(mddev, rdev);
+				r10_bio->devs[slot].bio = IO_BLOCKED;
+			}
 
 			rdev_dec_pending(rdev, mddev);
 
