From: jbeulich@suse.com
Subject: blkfront: allow using indirect request segment descriptors
Patch-mainline: n/a
References: fate#316876

--- a/drivers/xen/blkfront/blkfront.c
+++ b/drivers/xen/blkfront/blkfront.c
@@ -41,9 +41,11 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/log2.h>
+#include <linux/moduleparam.h>
 #include <linux/scatterlist.h>
 #include <linux/vmalloc.h>
 #include <scsi/scsi.h>
+#include <xen/blkif.h>
 #include <xen/evtchn.h>
 #include <xen/xenbus.h>
 #include <xen/interface/grant_table.h>
@@ -59,6 +61,7 @@
 struct blk_resume_entry {
 	struct list_head list;
 	struct blk_shadow copy;
+	struct blkif_request_segment *indirect_segs;
 };
 
 #define BLKIF_STATE_DISCONNECTED 0
@@ -70,7 +73,8 @@ static void connect(struct blkfront_info
 static void blkfront_closing(struct blkfront_info *);
 static int blkfront_remove(struct xenbus_device *);
 static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
-static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
+static int setup_blkring(struct xenbus_device *, struct blkfront_info *,
+			 unsigned int old_ring_size);
 
 static void kick_pending_request_queues(struct blkfront_info *);
 
@@ -78,9 +82,14 @@ static irqreturn_t blkif_int(int irq, vo
 static void blkif_restart_queue(struct work_struct *arg);
 static int blkif_recover(struct blkfront_info *, unsigned int old_ring_size,
 			 unsigned int new_ring_size);
-static void blkif_completion(struct blk_shadow *);
+static bool blkif_completion(struct blkfront_info *, unsigned long id,
+			     int status);
 static void blkif_free(struct blkfront_info *, int);
 
+/* Maximum number of indirect segments to be used by the front end. */
+static unsigned int max_segs_per_req = BITS_PER_LONG;
+module_param_named(max_indirect_segments, max_segs_per_req, uint, 0644);
+MODULE_PARM_DESC(max_indirect_segments, "maximum number of indirect segments");
 
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
@@ -121,6 +130,7 @@ static int blkfront_probe(struct xenbus_
 	info->connected = BLKIF_STATE_DISCONNECTED;
 	INIT_WORK(&info->work, blkif_restart_queue);
 	INIT_LIST_HEAD(&info->resume_list);
+	INIT_LIST_HEAD(&info->resume_split);
 
 	/* Front end dir is a number, which is used as the id. */
 	info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
@@ -185,7 +195,7 @@ static void shadow_init(struct blk_shado
 static int talk_to_backend(struct xenbus_device *dev,
 			   struct blkfront_info *info)
 {
-	unsigned int ring_size, ring_order;
+	unsigned int ring_size, ring_order, max_segs;
 	unsigned int old_ring_size = RING_SIZE(&info->ring);
 	const char *what = NULL;
 	struct xenbus_transaction xbt;
@@ -222,8 +232,20 @@ static int talk_to_backend(struct xenbus
 	 */
 	info->ring_size = ring_size = 1U << ring_order;
 
+	err = xenbus_scanf(XBT_NIL, dev->otherend,
+			   "feature-max-indirect-segments", "%u", &max_segs);
+	if (max_segs > max_segs_per_req)
+		max_segs = max_segs_per_req;
+	if (err != 1 || max_segs < BLKIF_MAX_SEGMENTS_PER_REQUEST)
+		max_segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	else if (BLKIF_INDIRECT_PAGES(max_segs)
+		 > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST)
+		max_segs = BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST
+			   * BLKIF_SEGS_PER_INDIRECT_FRAME;
+	info->max_segs_per_req = max_segs;
+
 	/* Create shared ring, alloc event channel. */
-	err = setup_blkring(dev, info);
+	err = setup_blkring(dev, info, old_ring_size);
 	if (err)
 		goto out;
 
@@ -295,8 +317,8 @@ again:
 		break;
 	}
 
-	pr_info("blkfront: %s: ring-pages=%u nr-ents=%u\n",
-		dev->nodename, info->ring_size, ring_size);
+	pr_info("blkfront: %s: ring-pages=%u nr-ents=%u segs-per-req=%u\n",
+		dev->nodename, info->ring_size, ring_size, max_segs);
 
 	return 0;
 
@@ -312,11 +334,12 @@ again:
 
 
 static int setup_blkring(struct xenbus_device *dev,
-			 struct blkfront_info *info)
+			 struct blkfront_info *info,
+			 unsigned int old_ring_size)
 {
 	blkif_sring_t *sring;
 	int err;
-	unsigned int nr;
+	unsigned int i, nr, ring_size;
 
 	for (nr = 0; nr < info->ring_size; nr++) {
 		info->ring_refs[nr] = GRANT_INVALID_REF;
@@ -338,7 +361,54 @@ static int setup_blkring(struct xenbus_d
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, nr * PAGE_SIZE);
 
-	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	info->sg = kcalloc(info->max_segs_per_req, sizeof(*info->sg),
+			   GFP_KERNEL);
+	if (!info->sg) {
+		err = -ENOMEM;
+		goto fail;
+	}
+	sg_init_table(info->sg, info->max_segs_per_req);
+
+	err = -ENOMEM;
+	ring_size = RING_SIZE(&info->ring);
+	if (info->max_segs_per_req > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+		if (!info->indirect_segs)
+			old_ring_size = 0;
+		if (old_ring_size < ring_size) {
+			struct blkif_request_segment **segs;
+
+			segs = krealloc(info->indirect_segs,
+					ring_size * sizeof(*segs),
+					GFP_KERNEL|__GFP_ZERO);
+			if (!segs)
+				goto fail;
+			info->indirect_segs = segs;
+		}
+		for (i = old_ring_size; i < ring_size; ++i) {
+			info->indirect_segs[i] =
+				vzalloc(BLKIF_INDIRECT_PAGES(info->max_segs_per_req)
+					* PAGE_SIZE);
+			if (!info->indirect_segs[i])
+				goto fail;
+		}
+	}
+	for (i = 0; i < ring_size; ++i) {
+		unsigned long *frame;
+
+		if (info->shadow[i].frame
+		    && ksize(info->shadow[i].frame) / sizeof(*frame)
+		       >= info->max_segs_per_req)
+			continue;
+		frame = krealloc(info->shadow[i].frame,
+				 info->max_segs_per_req * sizeof(*frame),
+				 GFP_KERNEL);
+		if (!frame)
+			goto fail;
+		if (!info->shadow[i].frame)
+			memset(frame, ~0,
+			       info->max_segs_per_req * sizeof(*frame));
+		info->shadow[i].frame = frame;
+	}
 
 	err = xenbus_multi_grant_ring(dev, nr, info->ring_pages,
 				      info->ring_refs);
@@ -700,6 +770,7 @@ static const char *op_name(unsigned int 
 		[BLKIF_OP_WRITE_BARRIER] = "barrier",
 		[BLKIF_OP_FLUSH_DISKCACHE] = "flush",
 		[BLKIF_OP_DISCARD] = "discard",
+		[BLKIF_OP_INDIRECT] = "indirect",
 	};
 
 	if (op >= ARRAY_SIZE(names))
@@ -718,6 +789,28 @@ static inline void flush_requests(struct
 		notify_remote_via_irq(info->irq);
 }
 
+static void split_request(struct blkif_request *req,
+			  struct blk_shadow *copy,
+			  const struct blkfront_info *info,
+			  const struct blkif_request_segment *segs)
+{
+	unsigned int i;
+
+	req->operation = copy->ind.indirect_op;
+	req->nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	req->handle = copy->ind.handle;
+	for (i = 0; i < BLKIF_MAX_SEGMENTS_PER_REQUEST; ++i) {
+		req->seg[i] = segs[i];
+		gnttab_grant_foreign_access_ref(segs[i].gref,
+			info->xbdev->otherend_id,
+			pfn_to_mfn(copy->frame[i]),
+			rq_data_dir(copy->request) ?
+			GTF_readonly : 0);
+		info->shadow[req->id].frame[i] = copy->frame[i];
+	}
+	copy->ind.id = req->id;
+}
+
 static void kick_pending_request_queues(struct blkfront_info *info)
 {
 	bool queued = false;
@@ -730,22 +823,67 @@ static void kick_pending_request_queues(
 					 struct blk_resume_entry, list);
 		blkif_request_t *req =
 			RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+		unsigned long *frame;
 		unsigned int i;
 
 		*req = ent->copy.req;
 
 		/* We get a new request id, and must reset the shadow state. */
 		req->id = GET_ID_FROM_FREELIST(info);
+		frame = info->shadow[req->id].frame;
 		info->shadow[req->id] = ent->copy;
 		info->shadow[req->id].req.id = req->id;
+		info->shadow[req->id].frame = frame;
 
 		/* Rewrite any grant references invalidated by susp/resume. */
-		for (i = 0; i < req->nr_segments; i++)
+		if (req->operation == BLKIF_OP_INDIRECT) {
+			const blkif_request_indirect_t *ind = (void *)req;
+			struct blkif_request_segment *segs =
+				 info->indirect_segs[req->id];
+
+			for (i = RING_SIZE(&info->ring); segs && i--; )
+				if (!info->indirect_segs[i]) {
+					info->indirect_segs[i] = segs;
+					segs = NULL;
+				}
+			if (segs)
+				vfree(segs);
+			segs = ent->indirect_segs;
+			info->indirect_segs[req->id] = segs;
+			if (ind->nr_segments > info->max_segs_per_req) {
+				split_request(req, &ent->copy, info, segs);
+				info->ring.req_prod_pvt++;
+				queued = true;
+				list_move_tail(&ent->list, &info->resume_split);
+				continue;
+			}
+			for (i = 0; i < BLKIF_INDIRECT_PAGES(ind->nr_segments);
+			     ++i) {
+				void *va = (void *)segs + i * PAGE_SIZE;
+				struct page *pg = vmalloc_to_page(va);
+
+				gnttab_grant_foreign_access_ref(
+					ind->indirect_grefs[i],
+					info->xbdev->otherend_id,
+					page_to_phys(pg) >> PAGE_SHIFT,
+					GTF_readonly);
+			}
+			for (i = 0; i < ind->nr_segments; ++i) {
+				gnttab_grant_foreign_access_ref(segs[i].gref,
+					info->xbdev->otherend_id,
+					pfn_to_mfn(ent->copy.frame[i]),
+					rq_data_dir(ent->copy.request) ?
+					GTF_readonly : 0);
+				frame[i] = ent->copy.frame[i];
+			}
+		} else for (i = 0; i < req->nr_segments; ++i) {
 			gnttab_grant_foreign_access_ref(req->seg[i].gref,
 				info->xbdev->otherend_id,
 				pfn_to_mfn(ent->copy.frame[i]),
 				rq_data_dir(ent->copy.request) ?
 				GTF_readonly : 0);
+			frame[i] = ent->copy.frame[i];
+		}
 
 		info->ring.req_prod_pvt++;
 		queued = true;
@@ -758,7 +896,8 @@ static void kick_pending_request_queues(
 	if (queued)
 		flush_requests(info);
 
-	if (!RING_FULL(&info->ring)) {
+	if (list_empty(&info->resume_split) &&
+	    list_empty(&info->resume_list) && !RING_FULL(&info->ring)) {
 		/* Re-enable calldowns. */
 		blk_start_queue(info->rq);
 		/* Kick things off immediately. */
@@ -964,7 +1102,7 @@ static int blkif_queue_request(struct re
 	unsigned long buffer_mfn;
 	blkif_request_t *ring_req;
 	unsigned long id;
-	unsigned int fsect, lsect;
+	unsigned int fsect, lsect, nr_segs;
 	int i, ref;
 	grant_ref_t gref_head;
 	struct scatterlist *sg;
@@ -972,13 +1110,15 @@ static int blkif_queue_request(struct re
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 		return 1;
 
-	if (gnttab_alloc_grant_references(
-		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+	nr_segs = info->max_segs_per_req;
+	if (nr_segs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
+		nr_segs += BLKIF_INDIRECT_PAGES(nr_segs);
+	if (gnttab_alloc_grant_references(nr_segs, &gref_head) < 0) {
 		gnttab_request_free_callback(
 			&info->callback,
 			blkif_restart_queue_callback,
 			info,
-			BLKIF_MAX_SEGMENTS_PER_REQUEST);
+			nr_segs);
 		return 1;
 	}
 
@@ -989,7 +1129,6 @@ static int blkif_queue_request(struct re
 
 	ring_req->id = id;
 	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
-	ring_req->handle = info->handle;
 
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -1006,13 +1145,41 @@ static int blkif_queue_request(struct re
 		/* id, sector_number and handle are set above. */
 		discard->operation = BLKIF_OP_DISCARD;
 		discard->flag = 0;
+		discard->handle = info->handle;
 		discard->nr_sectors = blk_rq_sectors(req);
 		if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
 			discard->flag = BLKIF_DISCARD_SECURE;
 	} else {
-		ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
-		BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
-		for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+		struct blkif_request_segment *segs;
+
+		nr_segs = blk_rq_map_sg(req->q, req, info->sg);
+		BUG_ON(nr_segs > info->max_segs_per_req);
+		if (nr_segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ring_req->nr_segments = nr_segs;
+			ring_req->handle = info->handle;
+			segs = ring_req->seg;
+		} else {
+			struct blkif_request_indirect *ind = (void *)ring_req;
+
+			ind->indirect_op = ring_req->operation;
+			ind->operation = BLKIF_OP_INDIRECT;
+			ind->nr_segments = nr_segs;
+			ind->handle = info->handle;
+			segs = info->indirect_segs[id];
+			for (i = 0; i < BLKIF_INDIRECT_PAGES(nr_segs); ++i) {
+				void *va = (void *)segs + i * PAGE_SIZE;
+				struct page *pg = vmalloc_to_page(va);
+
+				buffer_mfn = page_to_phys(pg) >> PAGE_SHIFT;
+				ref = gnttab_claim_grant_reference(&gref_head);
+				BUG_ON(ref == -ENOSPC);
+				gnttab_grant_foreign_access_ref(
+					ref, info->xbdev->otherend_id,
+					buffer_mfn, GTF_readonly);
+				ind->indirect_grefs[i] = ref;
+			}
+		}
+		for_each_sg(info->sg, sg, nr_segs, i) {
 			buffer_mfn = page_to_phys(sg_page(sg)) >> PAGE_SHIFT;
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
@@ -1027,8 +1194,7 @@ static int blkif_queue_request(struct re
 				rq_data_dir(req) ? GTF_readonly : 0 );
 
 			info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
-			ring_req->seg[i] =
-				(struct blkif_request_segment) {
+			segs[i] = (struct blkif_request_segment) {
 					.gref       = ref,
 					.first_sect = fsect,
 					.last_sect  = lsect };
@@ -1120,6 +1286,7 @@ static irqreturn_t blkif_int(int irq, vo
 	for (i = info->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
 		int ret;
+		bool done;
 
 		bret = RING_GET_RESPONSE(&info->ring, i);
 		if (unlikely(bret->id >= RING_SIZE(&info->ring))) {
@@ -1137,7 +1304,7 @@ static irqreturn_t blkif_int(int irq, vo
 		id   = bret->id;
 		req  = info->shadow[id].request;
 
-		blkif_completion(&info->shadow[id]);
+		done = blkif_completion(info, id, bret->status);
 
 		ret = ADD_ID_TO_FREELIST(info, id);
 		if (unlikely(ret)) {
@@ -1147,6 +1314,9 @@ static irqreturn_t blkif_int(int irq, vo
 			continue;
 		}
 
+		if (!done)
+			continue;
+
 		ret = bret->status == BLKIF_RSP_OKAY ? 0 : -EIO;
 		switch (bret->operation) {
 			const char *kind;
@@ -1157,7 +1327,10 @@ static irqreturn_t blkif_int(int irq, vo
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP))
 				ret = -EOPNOTSUPP;
 			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
-				     info->shadow[id].req.nr_segments == 0)) {
+				     !(info->shadow[id].req.operation ==
+				       BLKIF_OP_INDIRECT
+				       ? info->shadow[id].ind.nr_segments
+				       : info->shadow[id].req.nr_segments))) {
 				kind = "empty ";
 				ret = -EOPNOTSUPP;
 			}
@@ -1238,6 +1411,19 @@ static void blkif_free(struct blkfront_i
 	flush_work_sync(&info->work);
 
 	/* Free resources associated with old device channel. */
+	if (!suspend) {
+		unsigned int i;
+
+		if (info->indirect_segs) {
+			for (i = 0; i < RING_SIZE(&info->ring); ++i)
+				if (info->indirect_segs[i])
+					vfree(info->indirect_segs[i]);
+			kfree(info->indirect_segs);
+			info->indirect_segs = NULL;
+		}
+		for (i = 0; i < ARRAY_SIZE(info->shadow); ++i)
+			kfree(info->shadow[i].frame);
+	}
 	vunmap(info->ring.sring);
 	info->ring.sring = NULL;
 	gnttab_multi_end_foreign_access(info->ring_size,
@@ -1245,16 +1431,69 @@ static void blkif_free(struct blkfront_i
 	if (info->irq)
 		unbind_from_irqhandler(info->irq, info);
 	info->irq = 0;
+	kfree(info->sg);
+	info->sg = NULL;
+	info->max_segs_per_req = 0;
 }
 
-static void blkif_completion(struct blk_shadow *s)
+static bool blkif_completion(struct blkfront_info *info, unsigned long id,
+			     int status)
 {
-	int i;
+	struct blk_shadow *s = &info->shadow[id];
 
-	if (s->req.operation == BLKIF_OP_DISCARD)
-		return;
-	for (i = 0; i < s->req.nr_segments; i++)
-		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
+	switch (s->req.operation) {
+		unsigned int i, j;
+
+	case BLKIF_OP_DISCARD:
+		break;
+
+	case BLKIF_OP_INDIRECT: {
+		struct blkif_request_segment *segs = info->indirect_segs[id];
+		struct blk_resume_entry *tmp, *ent = NULL;
+
+		list_for_each_entry(tmp, &info->resume_split, list)
+			if (tmp->copy.ind.id == id) {
+				ent = tmp;
+				__list_del_entry(&ent->list);
+				break;
+			}
+		if (!ent) {
+			for (i = 0; i < s->ind.nr_segments; i++)
+				gnttab_end_foreign_access(segs[i].gref, 0UL);
+			gnttab_multi_end_foreign_access(
+				BLKIF_INDIRECT_PAGES(s->ind.nr_segments),
+				s->ind.indirect_grefs, NULL);
+			break;
+		}
+		for (i = 0; i < BLKIF_MAX_SEGMENTS_PER_REQUEST; i++)
+			gnttab_end_foreign_access(segs[i].gref, 0UL);
+		if (status != BLKIF_RSP_OKAY) {
+			kfree(ent);
+			break;
+		}
+		for (j = 0; i < ent->copy.ind.nr_segments; ++i, ++j)
+			segs[j] = segs[i];
+		ent->copy.frame += BLKIF_MAX_SEGMENTS_PER_REQUEST;
+		ent->copy.ind.nr_segments -= BLKIF_MAX_SEGMENTS_PER_REQUEST;
+		if (ent->copy.ind.nr_segments
+		    <= BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ent->copy.req.operation = ent->copy.ind.indirect_op;
+			ent->copy.req.nr_segments = ent->copy.ind.nr_segments;
+			ent->copy.req.handle = ent->copy.ind.handle;
+			for (i = 0; i < ent->copy.req.nr_segments; ++i)
+				ent->copy.req.seg[i] = segs[i];
+		}
+		list_add_tail(&ent->list, &info->resume_list);
+		return false;
+	}
+
+	default:
+		for (i = 0; i < s->req.nr_segments; i++)
+			gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
+		break;
+	}
+
+	return true;
 }
 
 static int blkif_recover(struct blkfront_info *info,
@@ -1267,14 +1506,35 @@ static int blkif_recover(struct blkfront
 
 	/* Stage 1: Make a safe copy of the shadow state. */
 	for (i = 0; i < old_ring_size; i++) {
+		unsigned int nr_segs;
+
 		/* Not in use? */
 		if (!info->shadow[i].request)
 			continue;
-		ent = kmalloc(sizeof(*ent),
+		switch (info->shadow[i].req.operation) {
+		default:
+			nr_segs = info->shadow[i].req.nr_segments;
+			break;
+		case BLKIF_OP_INDIRECT:
+			nr_segs = info->shadow[i].ind.nr_segments;
+			break;
+		case BLKIF_OP_DISCARD:
+			nr_segs = 0;
+			break;
+		}
+		ent = kmalloc(sizeof(*ent) + nr_segs * sizeof(*ent->copy.frame),
 			      GFP_NOIO | __GFP_NOFAIL | __GFP_HIGH);
 		if (!ent)
 			break;
 		ent->copy = info->shadow[i];
+		ent->copy.frame = (void *)(ent + 1);
+		memcpy(ent->copy.frame, info->shadow[i].frame,
+		       nr_segs * sizeof(*ent->copy.frame));
+		if (ent->copy.req.operation == BLKIF_OP_INDIRECT) {
+			ent->indirect_segs = info->indirect_segs[i];
+			info->indirect_segs[i] = NULL;
+		} else
+			ent->indirect_segs = NULL;
 		list_add_tail(&ent->list, &list);
 	}
 	if (i < old_ring_size) {
@@ -1289,7 +1549,13 @@ static int blkif_recover(struct blkfront
 	list_splice_tail(&list, &info->resume_list);
 
 	/* Stage 2: Set up free list. */
-	memset(&info->shadow, 0, sizeof(info->shadow));
+	for (i = 0; i < old_ring_size; ++i) {
+		unsigned long *frame = info->shadow[i].frame;
+
+		memset(info->shadow + i, 0, sizeof(*info->shadow));
+		memset(frame, ~0, info->max_segs_per_req * sizeof(*frame));
+		info->shadow[i].frame = frame;
+	}
 	shadow_init(info->shadow, ring_size);
 	info->shadow_free = info->ring.req_prod_pvt;
 
@@ -1305,6 +1571,13 @@ static int blkif_recover(struct blkfront
 
 	spin_unlock_irq(&info->io_lock);
 
+	if (info->indirect_segs)
+		for (i = ring_size; i < old_ring_size; ++i)
+			if (info->indirect_segs[i]) {
+				vfree(info->indirect_segs[i]);
+				info->indirect_segs[i] = NULL;
+			}
+
 	return 0;
 }
 
--- a/drivers/xen/blkfront/block.h
+++ b/drivers/xen/blkfront/block.h
@@ -76,9 +76,12 @@ struct xlbd_major_info
 };
 
 struct blk_shadow {
-	blkif_request_t req;
+	union {
+		blkif_request_t req;
+		blkif_request_indirect_t ind;
+	};
 	struct request *request;
-	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned long *frame;
 };
 
 #define BLK_MAX_RING_PAGE_ORDER 4U
@@ -102,14 +105,16 @@ struct blkfront_info
 	unsigned int ring_size;
 	blkif_front_ring_t ring;
 	spinlock_t io_lock;
-	struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct scatterlist *sg;
+	struct blkif_request_segment **indirect_segs;
 	unsigned int irq;
+	unsigned int max_segs_per_req;
 	struct xlbd_major_info *mi;
 	struct request_queue *rq;
 	struct work_struct work;
 	struct gnttab_free_callback callback;
 	struct blk_shadow shadow[BLK_MAX_RING_SIZE];
-	struct list_head resume_list;
+	struct list_head resume_list, resume_split;
 	grant_ref_t ring_refs[BLK_MAX_RING_PAGES];
 	struct page *ring_pages[BLK_MAX_RING_PAGES];
 	unsigned long shadow_free;
--- a/drivers/xen/blkfront/vbd.c
+++ b/drivers/xen/blkfront/vbd.c
@@ -378,14 +378,15 @@ xlvbd_init_blk_queue(struct gendisk *gd,
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_physical_block_size(rq, physical_sector_size);
-	blk_queue_max_hw_sectors(rq, 512);
+	blk_queue_max_hw_sectors(rq,
+				 info->max_segs_per_req << (PAGE_SHIFT - 9));
 
 	/* Each segment in a request is up to an aligned page in size. */
 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
 	blk_queue_max_segment_size(rq, PAGE_SIZE);
 
 	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	blk_queue_max_segments(rq, info->max_segs_per_req);
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
