From: jbeulich@suse.com
Subject: vscsi: support larger transfer sizes
Patch-mainline: Never, SUSE-Xen specific
References: bnc#774818

--- a/drivers/xen/scsiback/common.h
+++ b/drivers/xen/scsiback/common.h
@@ -95,10 +95,13 @@ struct vscsibk_info {
 	unsigned int waiting_reqs;
 	struct page **mmap_pages;
 
+	struct pending_req *preq;
+
+	struct gnttab_map_grant_ref   *gmap;
+	struct gnttab_unmap_grant_ref *gunmap;
 };
 
-typedef struct {
-	unsigned char act;
+typedef struct pending_req {
 	struct vscsibk_info *info;
 	struct scsi_device *sdev;
 
@@ -115,7 +118,8 @@ typedef struct {
 	
 	uint32_t request_bufflen;
 	struct scatterlist *sgl;
-	grant_ref_t gref[VSCSIIF_SG_TABLESIZE];
+	grant_ref_t *gref;
+	struct scsiif_request_segment *segs;
 
 	int32_t rslt;
 	uint32_t resid;
@@ -124,7 +128,7 @@ typedef struct {
 	struct list_head free_list;
 } pending_req_t;
 
-
+extern unsigned int vscsiif_segs;
 
 #define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs))
 #define scsiback_put(_b)				\
@@ -164,7 +168,7 @@ void scsiback_release_translation_entry(
 
 int scsiback_cmd_exec(pending_req_t *pending_req);
 void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
-			uint32_t resid, pending_req_t *pending_req);
+			uint32_t resid, pending_req_t *, uint8_t act);
 void scsiback_fast_flush_area(pending_req_t *req);
 
 void scsiback_rsp_emulation(pending_req_t *pending_req);
--- a/drivers/xen/scsiback/emulate.c
+++ b/drivers/xen/scsiback/emulate.c
@@ -351,7 +351,7 @@ int scsiback_req_emulation_or_cmdexec(pe
 	scsiback_fast_flush_area(pending_req);
 	scsiback_do_resp_with_sense(pending_req->sense_buffer,
 				    pending_req->rslt, pending_req->resid,
-				    pending_req);
+				    pending_req, VSCSIIF_ACT_SCSI_CDB);
 	return 0;
 }
 
--- a/drivers/xen/scsiback/interface.c
+++ b/drivers/xen/scsiback/interface.c
@@ -51,6 +51,13 @@ struct vscsibk_info *vscsibk_info_alloc(
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
+	info->gmap = kcalloc(sizeof(*info->gmap), vscsiif_segs, GFP_KERNEL);
+	info->gunmap = kcalloc(sizeof(*info->gunmap), vscsiif_segs, GFP_KERNEL);
+	if (!info->gmap || !info->gunmap) {
+		scsiback_free(info);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	info->domid = domid;
 	spin_lock_init(&info->ring_lock);
 	atomic_set(&info->nr_unreplied_reqs, 0);
@@ -122,6 +129,8 @@ void scsiback_disconnect(struct vscsibk_
 
 void scsiback_free(struct vscsibk_info *info)
 {
+	kfree(info->gmap);
+	kfree(info->gunmap);
 	kmem_cache_free(scsiback_cachep, info);
 }
 
--- a/drivers/xen/scsiback/scsiback.c
+++ b/drivers/xen/scsiback/scsiback.c
@@ -56,6 +56,10 @@ static unsigned int vscsiif_reqs = 128;
 module_param_named(reqs, vscsiif_reqs, uint, 0);
 MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
 
+unsigned int vscsiif_segs = VSCSIIF_SG_TABLESIZE;
+module_param_named(segs, vscsiif_segs, uint, 0);
+MODULE_PARM_DESC(segs, "Number of segments to allow per request");
+
 static int log_print_stat;
 module_param(log_print_stat, bool, 0644);
 
@@ -67,7 +71,7 @@ static grant_handle_t *pending_grant_han
 
 static int vaddr_pagenr(pending_req_t *req, int seg)
 {
-	return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg;
+	return (req - pending_reqs) * vscsiif_segs + seg;
 }
 
 static unsigned long vaddr(pending_req_t *req, int seg)
@@ -82,7 +86,7 @@ static unsigned long vaddr(pending_req_t
 
 void scsiback_fast_flush_area(pending_req_t *req)
 {
-	struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE];
+	struct gnttab_unmap_grant_ref *unmap = req->info->gunmap;
 	unsigned int i, invcount = 0;
 	grant_handle_t handle;
 	int err;
@@ -117,6 +121,7 @@ static pending_req_t * alloc_req(struct
 	if (!list_empty(&pending_free)) {
 		req = list_entry(pending_free.next, pending_req_t, free_list);
 		list_del(&req->free_list);
+		req->nr_segments = 0;
 	}
 	spin_unlock_irqrestore(&pending_free_lock, flags);
 	return req;
@@ -144,7 +149,8 @@ static void scsiback_notify_work(struct
 }
 
 void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
-			uint32_t resid, pending_req_t *pending_req)
+				 uint32_t resid, pending_req_t *pending_req,
+				 uint8_t act)
 {
 	vscsiif_response_t *ring_res;
 	struct vscsibk_info *info = pending_req->info;
@@ -159,6 +165,7 @@ void scsiback_do_resp_with_sense(char *s
 	ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
 	info->ring.rsp_prod_pvt++;
 
+	ring_res->act    = act;
 	ring_res->rslt   = result;
 	ring_res->rqid   = pending_req->rqid;
 
@@ -186,7 +193,8 @@ void scsiback_do_resp_with_sense(char *s
 	if (notify)
 		notify_remote_via_irq(info->irq);
 
-	free_req(pending_req);
+	if (act != VSCSIIF_ACT_SCSI_SG_PRESET)
+		free_req(pending_req);
 }
 
 static void scsiback_print_status(char *sense_buffer, int errors,
@@ -223,25 +231,25 @@ static void scsiback_cmd_done(struct req
 		scsiback_rsp_emulation(pending_req);
 
 	scsiback_fast_flush_area(pending_req);
-	scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
+	scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req,
+				    VSCSIIF_ACT_SCSI_CDB);
 	scsiback_put(pending_req->info);
 
 	__blk_put_request(req->q, req);
 }
 
 
-static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req,
-					pending_req_t *pending_req)
+static int scsiback_gnttab_data_map(const struct scsiif_request_segment *segs,
+				    unsigned int nr_segs,
+				    pending_req_t *pending_req)
 {
 	u32 flags;
-	int write;
-	int i, err = 0;
-	unsigned int data_len = 0;
-	struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE];
+	int write, err = 0;
+	unsigned int i, j, data_len = 0;
 	struct vscsibk_info *info   = pending_req->info;
-
+	struct gnttab_map_grant_ref *map = info->gmap;
 	int data_dir = (int)pending_req->sc_data_direction;
-	unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
+	unsigned int nr_segments = pending_req->nr_segments + nr_segs;
 
 	write = (data_dir == DMA_TO_DEVICE);
 
@@ -262,14 +270,20 @@ static int scsiback_gnttab_data_map(vscs
 		if (write)
 			flags |= GNTMAP_readonly;
 
-		for (i = 0; i < nr_segments; i++)
+		for (i = 0; i < pending_req->nr_segments; i++)
+			gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
+						pending_req->segs[i].gref,
+						info->domid);
+		for (j = 0; i < nr_segments; i++, j++)
 			gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
-						ring_req->seg[i].gref,
+						segs[j].gref,
 						info->domid);
 
+
 		err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments);
 		BUG_ON(err);
 
+		j = 0;
 		for_each_sg (pending_req->sgl, sg, nr_segments, i) {
 			struct page *pg;
 
@@ -292,8 +306,15 @@ static int scsiback_gnttab_data_map(vscs
 			set_phys_to_machine(page_to_pfn(pg),
 				FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
 
-			sg_set_page(sg, pg, ring_req->seg[i].length,
-				    ring_req->seg[i].offset);
+			if (i < pending_req->nr_segments)
+				sg_set_page(sg, pg,
+					    pending_req->segs[i].length,
+					    pending_req->segs[i].offset);
+			else {
+				sg_set_page(sg, pg, segs[j].length,
+					    segs[j].offset);
+				++j;
+			}
 			data_len += sg->length;
 
 			barrier();
@@ -304,6 +325,8 @@ static int scsiback_gnttab_data_map(vscs
 
 		}
 
+		pending_req->nr_segments = nr_segments;
+
 		if (err)
 			goto fail_flush;
 	}
@@ -475,7 +498,8 @@ static void scsiback_device_reset_exec(p
 	scsiback_get(info);
 	err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE);
 
-	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
+	scsiback_do_resp_with_sense(NULL, err, 0, pending_req,
+				    VSCSIIF_ACT_SCSI_RESET);
 	scsiback_put(info);
 
 	return;
@@ -493,13 +517,11 @@ static int prepare_pending_reqs(struct v
 {
 	struct scsi_device *sdev;
 	struct ids_tuple vir;
+	unsigned int nr_segs;
 	int err = -EINVAL;
 
 	DPRINTK("%s\n",__FUNCTION__);
 
-	pending_req->rqid       = ring_req->rqid;
-	pending_req->act        = ring_req->act;
-
 	pending_req->info       = info;
 
 	vir.chn = ring_req->channel;
@@ -531,11 +553,10 @@ static int prepare_pending_reqs(struct v
 		goto invalid_value;
 	}
 
-	pending_req->nr_segments = ring_req->nr_segments;
+	nr_segs = ring_req->nr_segments;
 	barrier();
-	if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) {
-		DPRINTK("scsiback: invalid parameter nr_seg = %d\n",
-			pending_req->nr_segments);
+	if (pending_req->nr_segments + nr_segs > vscsiif_segs) {
+		DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs);
 		err = -EINVAL;
 		goto invalid_value;
 	}
@@ -552,7 +573,7 @@ static int prepare_pending_reqs(struct v
 	
 	pending_req->timeout_per_command = ring_req->timeout_per_command;
 
-	if(scsiback_gnttab_data_map(ring_req, pending_req)) {
+	if (scsiback_gnttab_data_map(ring_req->seg, nr_segs, pending_req)) {
 		DPRINTK("scsiback: invalid buffer\n");
 		err = -EINVAL;
 		goto invalid_value;
@@ -564,6 +585,20 @@ invalid_value:
 	return err;
 }
 
+static void latch_segments(pending_req_t *pending_req,
+			   const struct vscsiif_sg_list *sgl)
+{
+	unsigned int nr_segs = sgl->nr_segments;
+
+	barrier();
+	if (pending_req->nr_segments + nr_segs <= vscsiif_segs) {
+		memcpy(pending_req->segs + pending_req->nr_segments,
+		       sgl->seg, nr_segs * sizeof(*sgl->seg));
+		barrier();
+		pending_req->nr_segments += nr_segs;
+	} else
+		DPRINTK("scsiback: invalid nr_segs = %u\n", nr_segs);
+}
 
 static int _scsiback_do_cmd_fn(struct vscsibk_info *info)
 {
@@ -590,9 +625,11 @@ static int _scsiback_do_cmd_fn(struct vs
 	}
 
 	while ((rc != rp)) {
+		int act, rqid;
+
 		if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
 			break;
-		pending_req = alloc_req(info);
+		pending_req = info->preq ?: alloc_req(info);
 		if (NULL == pending_req) {
 			more_to_do = 1;
 			break;
@@ -601,9 +638,26 @@ static int _scsiback_do_cmd_fn(struct vs
 		ring_req = RING_GET_REQUEST(ring, rc);
 		ring->req_cons = ++rc;
 
-		err = prepare_pending_reqs(info, ring_req,
-						pending_req);
-		switch (err ?: pending_req->act) {
+		act = ring_req->act;
+		rqid = ring_req->rqid;
+		barrier();
+		if (!pending_req->nr_segments)
+			pending_req->rqid = rqid;
+		else if (pending_req->rqid != rqid)
+			DPRINTK("scsiback: invalid rqid %04x, expected %04x\n",
+				rqid, pending_req->rqid);
+
+		info->preq = NULL;
+		if (pending_req->rqid != rqid)
+			err = -EINVAL;
+		else if (act == VSCSIIF_ACT_SCSI_SG_PRESET) {
+			latch_segments(pending_req, (void *)ring_req);
+			info->preq = pending_req;
+			err = 0;
+		} else
+			err = prepare_pending_reqs(info, ring_req,
+						   pending_req);
+		switch (err ?: act) {
 		case VSCSIIF_ACT_SCSI_CDB:
 			/* The Host mode is through as for Emulation. */
 			if (info->feature == VSCSI_TYPE_HOST ?
@@ -612,7 +666,8 @@ static int _scsiback_do_cmd_fn(struct vs
 				scsiback_fast_flush_area(pending_req);
 				scsiback_do_resp_with_sense(NULL,
 							    DRIVER_ERROR << 24,
-							    0, pending_req);
+							    0, pending_req,
+							    act);
 			}
 			break;
 		case VSCSIIF_ACT_SCSI_RESET:
@@ -620,19 +675,27 @@ static int _scsiback_do_cmd_fn(struct vs
 			scsiback_fast_flush_area(pending_req);
 			scsiback_device_reset_exec(pending_req);
 			break;
+		case VSCSIIF_ACT_SCSI_SG_PRESET:
+			scsiback_do_resp_with_sense(NULL, 0, 0, pending_req,
+						    act);
+			break;
 		default:
 			if(!err) {
 				scsiback_fast_flush_area(pending_req);
 				if (printk_ratelimit())
 					pr_err("scsiback: invalid request %#x\n",
-					       pending_req->act);
+					       act);
 			}
 			scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24,
-						    0, pending_req);
+						    0, pending_req, act);
+			break;
+		case -EINVAL:
+			scsiback_do_resp_with_sense(NULL, DRIVER_INVALID << 24,
+						    0, pending_req, act);
 			break;
 		case -ENODEV:
 			scsiback_do_resp_with_sense(NULL, DID_NO_CONNECT << 16,
-						    0, pending_req);
+						    0, pending_req, act);
 			break;
 		}
 
@@ -704,17 +767,32 @@ static int __init scsiback_init(void)
 	if (!is_running_on_xen())
 		return -ENODEV;
 
-	mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
+	if (vscsiif_segs < VSCSIIF_SG_TABLESIZE)
+		vscsiif_segs = VSCSIIF_SG_TABLESIZE;
+	if (vscsiif_segs != (uint8_t)vscsiif_segs)
+		return -EINVAL;
+	mmap_pages = vscsiif_reqs * vscsiif_segs;
 
 	pending_reqs          = kzalloc(sizeof(pending_reqs[0]) *
 					vscsiif_reqs, GFP_KERNEL);
+	if (!pending_reqs)
+		return -ENOMEM;
 	pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
 					mmap_pages, GFP_KERNEL);
 	pending_pages         = alloc_empty_pages_and_pagevec(mmap_pages);
 
-	if (!pending_reqs || !pending_grant_handles || !pending_pages)
+	if (!pending_grant_handles || !pending_pages)
 		goto out_of_memory;
 
+	for (i = 0; i < vscsiif_reqs; ++i) {
+		pending_reqs[i].gref = kcalloc(sizeof(*pending_reqs->gref),
+					       vscsiif_segs, GFP_KERNEL);
+		pending_reqs[i].segs = kcalloc(sizeof(*pending_reqs->segs),
+					       vscsiif_segs, GFP_KERNEL);
+		if (!pending_reqs[i].gref || !pending_reqs[i].segs)
+			goto out_of_memory;
+	}
+
 	for (i = 0; i < mmap_pages; i++)
 		pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE;
 
@@ -736,6 +814,10 @@ static int __init scsiback_init(void)
 out_interface:
 	scsiback_interface_exit();
 out_of_memory:
+	for (i = 0; i < vscsiif_reqs; ++i) {
+		kfree(pending_reqs[i].gref);
+		kfree(pending_reqs[i].segs);
+	}
 	kfree(pending_reqs);
 	kfree(pending_grant_handles);
 	free_empty_pages_and_pagevec(pending_pages, mmap_pages);
@@ -746,12 +828,17 @@ out_of_memory:
 #if 0
 static void __exit scsiback_exit(void)
 {
+	unsigned int i;
+
 	scsiback_xenbus_unregister();
 	scsiback_interface_exit();
+	for (i = 0; i < vscsiif_reqs; ++i) {
+		kfree(pending_reqs[i].gref);
+		kfree(pending_reqs[i].segs);
+	}
 	kfree(pending_reqs);
 	kfree(pending_grant_handles);
-	free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * VSCSIIF_SG_TABLESIZE));
-
+	free_empty_pages_and_pagevec(pending_pages, vscsiif_reqs * vscsiif_segs);
 }
 #endif
 
--- a/drivers/xen/scsiback/xenbus.c
+++ b/drivers/xen/scsiback/xenbus.c
@@ -352,6 +352,13 @@ static int scsiback_probe(struct xenbus_
 	if (val)
 		be->info->feature = VSCSI_TYPE_HOST;
 
+	if (vscsiif_segs > VSCSIIF_SG_TABLESIZE) {
+		err = xenbus_printf(XBT_NIL, dev->nodename, "segs-per-req",
+				    "%u", vscsiif_segs);
+		if (err)
+			xenbus_dev_error(dev, err, "writing segs-per-req");
+	}
+
 	err = xenbus_switch_state(dev, XenbusStateInitWait);
 	if (err)
 		goto fail;
--- a/drivers/xen/scsifront/common.h
+++ b/drivers/xen/scsifront/common.h
@@ -93,7 +93,7 @@ struct vscsifrnt_shadow {
 
 	/* requested struct scsi_cmnd is stored from kernel */
 	struct scsi_cmnd *sc;
-	int gref[VSCSIIF_SG_TABLESIZE];
+	int gref[SG_ALL];
 };
 
 struct vscsifrnt_info {
@@ -107,7 +107,6 @@ struct vscsifrnt_info {
 
 	grant_ref_t ring_ref;
 	struct vscsiif_front_ring ring;
-	struct vscsiif_response	ring_res;
 
 	struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS];
 	uint32_t shadow_free;
@@ -121,6 +120,13 @@ struct vscsifrnt_info {
 	unsigned char waiting_pause;
 	unsigned char pause;
 	unsigned callers;
+
+	struct {
+		struct scsi_cmnd *sc;
+		unsigned int rqid;
+		unsigned int done;
+		struct scsiif_request_segment segs[];
+	} active;
 };
 
 #define DPRINTK(_f, _a...)				\
--- a/drivers/xen/scsifront/scsifront.c
+++ b/drivers/xen/scsifront/scsifront.c
@@ -110,6 +110,66 @@ irqreturn_t scsifront_intr(int irq, void
 	return IRQ_HANDLED;
 }
 
+static bool push_cmd_to_ring(struct vscsifrnt_info *info,
+			     vscsiif_request_t *ring_req)
+{
+	unsigned int left, rqid = info->active.rqid;
+	struct scsi_cmnd *sc;
+
+	for (; ; ring_req = NULL) {
+		struct vscsiif_sg_list *sgl;
+
+		if (!ring_req) {
+			struct vscsiif_front_ring *ring = &info->ring;
+
+			ring_req = RING_GET_REQUEST(ring, ring->req_prod_pvt);
+			ring->req_prod_pvt++;
+			ring_req->rqid = rqid;
+		}
+
+		left = info->shadow[rqid].nr_segments - info->active.done;
+		if (left <= VSCSIIF_SG_TABLESIZE)
+			break;
+
+		sgl = (void *)ring_req;
+		sgl->act = VSCSIIF_ACT_SCSI_SG_PRESET;
+
+		if (left > VSCSIIF_SG_LIST_SIZE)
+			left = VSCSIIF_SG_LIST_SIZE;
+		memcpy(sgl->seg, info->active.segs + info->active.done,
+		       left * sizeof(*sgl->seg));
+
+		sgl->nr_segments = left;
+		info->active.done += left;
+
+		if (RING_FULL(&info->ring))
+			return false;
+	}
+
+	sc = info->active.sc;
+
+	ring_req->act     = VSCSIIF_ACT_SCSI_CDB;
+	ring_req->id      = sc->device->id;
+	ring_req->lun     = sc->device->lun;
+	ring_req->channel = sc->device->channel;
+	ring_req->cmd_len = sc->cmd_len;
+
+	if ( sc->cmd_len )
+		memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
+	else
+		memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
+
+	ring_req->sc_data_direction   = sc->sc_data_direction;
+	ring_req->timeout_per_command = sc->request->timeout / HZ;
+	ring_req->nr_segments         = left;
+
+	memcpy(ring_req->seg, info->active.segs + info->active.done,
+               left * sizeof(*ring_req->seg));
+
+	info->active.sc = NULL;
+
+	return !RING_FULL(&info->ring);
+}
 
 static void scsifront_gnttab_done(struct vscsifrnt_info *info, uint32_t id)
 {
@@ -192,6 +252,16 @@ static void scsifront_sync_cmd_done(stru
 static void scsifront_do_response(struct vscsifrnt_info *info,
 				  vscsiif_response_t *ring_res)
 {
+	if (info->host->sg_tablesize > VSCSIIF_SG_TABLESIZE) {
+		u8 act = ring_res->act;
+
+		if (act == VSCSIIF_ACT_SCSI_SG_PRESET)
+			return;
+		if (act != info->shadow[ring_res->rqid].act)
+			DPRINTK("Bogus backend response (%02x vs %02x)\n",
+				act, info->shadow[ring_res->rqid].act);
+	}
+
 	if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB)
 		scsifront_cdb_cmd_done(info, ring_res);
 	else
@@ -231,6 +301,11 @@ static int scsifront_cmd_done(struct vsc
 
 	more_to_do = scsifront_ring_drain(info);
 
+	if (info->active.sc && !RING_FULL(&info->ring)) {
+		push_cmd_to_ring(info, NULL);
+		scsifront_do_request(info);
+	}
+
 	info->waiting_sync = 0;
 
 	spin_unlock_irqrestore(info->host->host_lock, flags);
@@ -283,7 +358,8 @@ int scsifront_schedule(void *data)
 
 
 static int map_data_for_request(struct vscsifrnt_info *info,
-		struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id)
+				struct scsi_cmnd *sc,
+				struct vscsifrnt_shadow *shadow)
 {
 	grant_ref_t gref_head;
 	struct page *page;
@@ -295,7 +371,7 @@ static int map_data_for_request(struct v
 	if (sc->sc_data_direction == DMA_NONE || !data_len)
 		return 0;
 
-	err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head);
+	err = gnttab_alloc_grant_references(info->host->sg_tablesize, &gref_head);
 	if (err) {
 		shost_printk(PREFIX(ERR), info->host,
 			     "gnttab_alloc_grant_references() error\n");
@@ -304,7 +380,7 @@ static int map_data_for_request(struct v
 
 	/* quoted scsi_lib.c/scsi_req_map_sg . */
 	nr_pages = PFN_UP(data_len + scsi_sglist(sc)->offset);
-	if (nr_pages > VSCSIIF_SG_TABLESIZE) {
+	if (nr_pages > info->host->sg_tablesize) {
 		shost_printk(PREFIX(ERR), info->host,
 			     "Unable to map request_buffer for command!\n");
 		ref_cnt = -E2BIG;
@@ -331,10 +407,10 @@ static int map_data_for_request(struct v
 				gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
 					page_to_phys(page) >> PAGE_SHIFT, write);
 
-				info->shadow[id].gref[ref_cnt]  = ref;
-				ring_req->seg[ref_cnt].gref     = ref;
-				ring_req->seg[ref_cnt].offset   = (uint16_t)off;
-				ring_req->seg[ref_cnt].length   = (uint16_t)bytes;
+				shadow->gref[ref_cnt] = ref;
+				info->active.segs[ref_cnt].gref   = ref;
+				info->active.segs[ref_cnt].offset = off;
+				info->active.segs[ref_cnt].length = bytes;
 
 				page++;
 				len -= bytes;
@@ -398,33 +474,27 @@ static int scsifront_queuecommand(struct
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
+	if (info->active.sc && !push_cmd_to_ring(info, NULL)) {
+		scsifront_do_request(info);
+		scsifront_return(info);
+		spin_unlock_irqrestore(shost->host_lock, flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+
 	sc->result    = 0;
 
 	ring_req          = scsifront_pre_request(info);
 	rqid              = ring_req->rqid;
-	ring_req->act     = VSCSIIF_ACT_SCSI_CDB;
-
-	ring_req->id      = sc->device->id;
-	ring_req->lun     = sc->device->lun;
-	ring_req->channel = sc->device->channel;
-	ring_req->cmd_len = sc->cmd_len;
 
 	BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
 
-	if ( sc->cmd_len )
-		memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
-	else
-		memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
-
-	ring_req->sc_data_direction   = (uint8_t)sc->sc_data_direction;
-	ring_req->timeout_per_command = (sc->request->timeout / HZ);
-
 	info->shadow[rqid].sc  = sc;
 	info->shadow[rqid].act = VSCSIIF_ACT_SCSI_CDB;
 
-	ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
+	ref_cnt = map_data_for_request(info, sc, &info->shadow[rqid]);
 	if (ref_cnt < 0) {
 		add_id_to_freelist(info, rqid);
+		scsifront_do_request(info);
 		scsifront_return(info);
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		if (ref_cnt == (-ENOMEM))
@@ -434,9 +504,13 @@ static int scsifront_queuecommand(struct
 		return 0;
 	}
 
-	ring_req->nr_segments          = (uint8_t)ref_cnt;
 	info->shadow[rqid].nr_segments = ref_cnt;
 
+	info->active.sc  = sc;
+	info->active.rqid = rqid;
+	info->active.done = 0;
+	push_cmd_to_ring(info, ring_req);
+
 	scsifront_do_request(info);
 
 	scsifront_return(info);
--- a/drivers/xen/scsifront/xenbus.c
+++ b/drivers/xen/scsifront/xenbus.c
@@ -42,6 +42,10 @@
   #define DEFAULT_TASK_COMM_LEN	TASK_COMM_LEN
 #endif
 
+static unsigned int max_nr_segs = VSCSIIF_SG_TABLESIZE;
+module_param_named(max_segs, max_nr_segs, uint, 0);
+MODULE_PARM_DESC(max_segs, "Maximum number of segments per request");
+
 extern struct scsi_host_template scsifront_sht;
 
 static void scsifront_free_ring(struct vscsifrnt_info *info)
@@ -185,7 +189,9 @@ static int scsifront_probe(struct xenbus
 	int i, err = -ENOMEM;
 	char name[DEFAULT_TASK_COMM_LEN];
 
-	host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
+	host = scsi_host_alloc(&scsifront_sht,
+			       offsetof(struct vscsifrnt_info,
+					active.segs[max_nr_segs]));
 	if (!host) {
 		xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
 		return err;
@@ -232,7 +238,7 @@ static int scsifront_probe(struct xenbus
 	host->max_id      = VSCSIIF_MAX_TARGET;
 	host->max_channel = 0;
 	host->max_lun     = VSCSIIF_MAX_LUN;
-	host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512;
+	host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
 	host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
 
 	err = scsi_add_host(host, &dev->dev);
@@ -347,6 +353,28 @@ static int scsifront_disconnect(struct v
 	return 0;
 }
 
+static void scsifront_read_backend_params(struct xenbus_device *dev,
+					  struct vscsifrnt_info *info)
+{
+	unsigned int nr_segs;
+	int ret;
+	struct Scsi_Host *host = info->host;
+
+	ret = xenbus_scanf(XBT_NIL, dev->otherend, "segs-per-req", "%u",
+			   &nr_segs);
+	if (ret != 1)
+		nr_segs = VSCSIIF_SG_TABLESIZE;
+	if (!info->pause && nr_segs > host->sg_tablesize) {
+		host->sg_tablesize = min(nr_segs, max_nr_segs);
+		dev_info(&dev->dev, "using up to %d SG entries\n",
+			 host->sg_tablesize);
+		host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
+	} else if (info->pause && nr_segs < host->sg_tablesize)
+		dev_warn(&dev->dev,
+			 "SG entries decreased from %d to %u - device may not work properly anymore\n",
+			 host->sg_tablesize, nr_segs);
+}
+
 #define VSCSIFRONT_OP_ADD_LUN	1
 #define VSCSIFRONT_OP_DEL_LUN	2
 #define VSCSIFRONT_OP_READD_LUN	3
@@ -443,6 +471,7 @@ static void scsifront_backend_changed(st
 		break;
 
 	case XenbusStateConnected:
+		scsifront_read_backend_params(dev, info);
 		if (info->pause) {
 			scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN);
 			xenbus_switch_state(dev, XenbusStateConnected);
@@ -499,6 +528,11 @@ static DEFINE_XENBUS_DRIVER(scsifront, ,
 
 int __init scsifront_xenbus_init(void)
 {
+	if (max_nr_segs > SG_ALL)
+		max_nr_segs = SG_ALL;
+	if (max_nr_segs < VSCSIIF_SG_TABLESIZE)
+		max_nr_segs = VSCSIIF_SG_TABLESIZE;
+
 	return xenbus_register_frontend(&scsifront_driver);
 }
 
