From: jbeulich@suse.com
Subject: privcmd: actually batch hypercalls for batch ioctls
Patch-mainline: n/a

Inspired by a functionally similar pv-ops patch from Mats Petersson.

--- sle11sp4.orig/arch/x86/include/mach-xen/asm/pgtable.h	2014-12-10 15:56:15.000000000 +0100
+++ sle11sp4/arch/x86/include/mach-xen/asm/pgtable.h	2013-02-19 15:59:02.000000000 +0100
@@ -896,6 +896,14 @@ int direct_remap_pfn_range(struct vm_are
                            unsigned long size,
                            pgprot_t,
                            domid_t);
+int direct_remap_pfns_range(struct vm_area_struct *,
+			    unsigned long address,
+			    const unsigned long *mfns,
+			    unsigned long size,
+			    pgprot_t,
+			    domid_t,
+			    void (*)(unsigned int idx, int rc, void *),
+			    void *ctxt);
 int direct_kernel_remap_pfn_range(unsigned long address,
 				  unsigned long mfn,
 				  unsigned long size,
--- sle11sp4.orig/arch/x86/mm/ioremap-xen.c	2014-10-31 13:48:09.000000000 +0100
+++ sle11sp4/arch/x86/mm/ioremap-xen.c	2013-02-05 17:21:28.000000000 +0100
@@ -44,60 +44,79 @@ static int direct_remap_area_pte_fn(pte_
 static int __direct_remap_pfn_range(struct mm_struct *mm,
 				    unsigned long address,
 				    unsigned long mfn,
+				    const unsigned long *mfns,
 				    unsigned long size,
 				    pgprot_t prot,
-				    domid_t  domid)
+				    domid_t  domid,
+				    void (*cb)(unsigned int idx, int rc,
+					       void *),
+				    void *ctxt)
 {
 	int rc = 0;
-	unsigned long i, start_address;
-	mmu_update_t *u, *v, *w;
+	unsigned int i, idx;
+	mmu_update_t *u, *v;
 
-	u = v = w = (mmu_update_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	u = (mmu_update_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (u == NULL)
 		return -ENOMEM;
 
-	start_address = address;
 	pgprot_val(prot) |= _PAGE_IOMAP;
 
 	flush_cache_all();
 
-	for (i = 0; i < size; i += PAGE_SIZE) {
-		if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) {
-			/* Flush a full batch after filling in the PTE ptrs. */
-			rc = apply_to_page_range(mm, start_address,
-						 address - start_address,
-						 direct_remap_area_pte_fn, &w);
-			if (rc)
-				goto out;
-			rc = HYPERVISOR_mmu_update(u, v - u, NULL, domid);
-			if (rc < 0)
-				goto out;
-			v = w = u;
-			start_address = address;
-		}
-
+	for (i = idx = 0, v = u; ; ) {
 		/*
 		 * Fill in the machine address: PTE ptr is done later by
 		 * apply_to_page_range().
 		 */
 		v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot)));
 
-		mfn++;
-		address += PAGE_SIZE;
+		i += PAGE_SIZE;
 		v++;
-	}
 
-	if (v != u) {
-		/* Final batch. */
-		rc = apply_to_page_range(mm, start_address,
-					 address - start_address,
-					 direct_remap_area_pte_fn, &w);
-		if (rc)
-			goto out;
-		rc = HYPERVISOR_mmu_update(u, v - u, NULL, domid);
+		if (i >= size || v - u == PAGE_SIZE / sizeof(mmu_update_t)) {
+			mmu_update_t *w = u;
+			unsigned int nr = v - u;
+
+			/* Flush a full batch after filling in the PTE ptrs. */
+			rc = apply_to_page_range(mm, address, i,
+						 direct_remap_area_pte_fn, &w);
+			if (rc)
+				break;
+
+			v = u;
+			do {
+				unsigned int j, done;
+
+				rc = HYPERVISOR_mmu_update(v, nr, &done,
+							   domid);
+				if (!cb)
+					break;
+				for (j = 0; j < done; ++j)
+					cb(idx++, 0, ctxt);
+				if (!rc)
+					break;
+				cb(idx++, rc, ctxt);
+				rc = 0;
+				v += done + 1;
+				nr -= done + 1;
+			} while (nr);
+
+			if (rc || i >= size)
+				break;
+
+			size -= i;
+			address += i;
+			i = 0;
+			v = u;
+		}
+
+		if (mfns)
+			mfn = *++mfns;
+		else
+			++mfn;
 	}
 
- out:
 	flush_tlb_all();
 
 	free_page((unsigned long)u);
@@ -122,8 +141,8 @@ int direct_remap_pfn_range(struct vm_are
 
 	vma->vm_mm->context.has_foreign_mappings = 1;
 
-	return __direct_remap_pfn_range(
-		vma->vm_mm, address, mfn, size, prot, domid);
+	return __direct_remap_pfn_range(vma->vm_mm, address, mfn, NULL, size,
+					prot, domid, NULL, NULL);
 }
 EXPORT_SYMBOL(direct_remap_pfn_range);
 
@@ -133,11 +152,41 @@ int direct_kernel_remap_pfn_range(unsign
 				  pgprot_t prot,
 				  domid_t  domid)
 {
-	return __direct_remap_pfn_range(
-		&init_mm, address, mfn, size, prot, domid);
+	return __direct_remap_pfn_range(&init_mm, address, mfn, NULL, size,
+					prot, domid, NULL, NULL);
 }
 EXPORT_SYMBOL(direct_kernel_remap_pfn_range);
 
+int direct_remap_pfns_range(struct vm_area_struct *vma,
+			    unsigned long address,
+			    const unsigned long *mfns,
+			    unsigned long size,
+			    pgprot_t prot,
+			    domid_t  domid,
+			    void (*cb)(unsigned int idx, int rc, void *),
+			    void *ctxt)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		unsigned long offs;
+
+		for (offs = 0; offs < size; offs += PAGE_SIZE, ++mfns) {
+			int rc = remap_pfn_range(vma, address + offs, *mfns,
+						 PAGE_SIZE, prot);
+			cb(offs >> PAGE_SHIFT, rc, ctxt);
+		}
+		return 0;
+	}
+
+	if (domid == DOMID_SELF)
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+	vma->vm_mm->context.has_foreign_mappings = 1;
+
+	return __direct_remap_pfn_range(vma->vm_mm, address, *mfns, mfns,
+					size, prot, domid, cb, ctxt);
+}
+
 static int lookup_pte_fn(
 	pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
 {
@@ -339,7 +388,7 @@ static void __iomem *__ioremap_caller(re
 		goto err_free_area;
 
 	if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
-				     size, prot, domid))
+				     NULL, size, prot, domid, NULL, NULL))
 		goto err_free_area;
 
 	ret_addr = (void __iomem *) (vaddr + offset);
--- sle11sp4.orig/drivers/xen/privcmd/privcmd.c	2014-01-16 10:01:23.000000000 +0100
+++ sle11sp4/drivers/xen/privcmd/privcmd.c	2014-11-11 14:28:51.000000000 +0100
@@ -58,13 +58,36 @@ static inline int enforce_singleshot_map
 	privcmd_enforce_singleshot_mapping(vma)
 #endif
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+struct mmapbatch_ctxt {
+	int *err;
+	xen_pfn_t *mfn;
+	int status;
+};
+
+static void mmapbatch_cb(unsigned int idx, int rc, void *data)
+{
+	struct mmapbatch_ctxt *ctxt = data;
+
+	if (ctxt->err)
+		ctxt->err[idx] = rc;
+	else if (rc == -ENOENT)
+		ctxt->mfn[idx] |= 0x80000000U;
+	else if (rc)
+		ctxt->mfn[idx] |= 0xf0000000U;
+
+	if (rc == -ENOENT || !ctxt->status)
+		ctxt->status = rc;
+}
+#endif
+
 static long privcmd_ioctl(struct file *file,
 			  unsigned int cmd, unsigned long data)
 {
 	long ret;
 	void __user *udata = (void __user *) data;
 	unsigned long i, addr, nr, nr_pages;
-	int paged_out;
+	struct mmapbatch_ctxt mbc;
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	LIST_HEAD(pagelist);
@@ -218,10 +241,9 @@ static long privcmd_ioctl(struct file *f
 
 	case IOCTL_PRIVCMD_MMAPBATCH: {
 #define MMAPBATCH_NR_PER_PAGE \
-	(unsigned long)((PAGE_SIZE - sizeof(*l)) / sizeof(*mfn))
+	(unsigned long)((PAGE_SIZE - sizeof(*l)) / sizeof(*mbc.mfn))
 		privcmd_mmapbatch_t m;
 		xen_pfn_t __user *p;
-		xen_pfn_t *mfn;
 
 		if (!is_initial_xendomain())
 			return -EPERM;
@@ -250,9 +272,9 @@ static long privcmd_ioctl(struct file *f
 			INIT_LIST_HEAD(l);
 			list_add_tail(l, &pagelist);
 
-			mfn = (unsigned long*)(l + 1);
+			mbc.mfn = (void *)(l + 1);
 			ret = -EFAULT;
-			if (copy_from_user(mfn, p, nr*sizeof(*mfn)))
+			if (copy_from_user(mbc.mfn, p, nr * sizeof(*mbc.mfn)))
 				goto mmapbatch_out;
 
 			i += nr; p+= nr;
@@ -271,53 +293,44 @@ static long privcmd_ioctl(struct file *f
 		}
 
 		i = 0;
-		ret = 0;
-		paged_out = 0;
+		mbc.status = 0;
+		mbc.err = NULL;
 		list_for_each(l, &pagelist) {
 			if (i)
 				cond_resched();
 
-			nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
-			mfn = (unsigned long *)(l + 1);
-
-			while (i<nr) {
-				int rc;
+			nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
+			mbc.mfn = (void *)(l + 1);
 
-				rc = direct_remap_pfn_range(vma, addr & PAGE_MASK,
-				                            *mfn, PAGE_SIZE,
-				                            vma->vm_page_prot, m.dom);
-				if(rc < 0) {
-					if (rc == -ENOENT)
-					{
-						*mfn |= 0x80000000U;
-						paged_out = 1;
-					}
-					else
-						*mfn |= 0xf0000000U;
-					ret++;
-				}
-				mfn++; i++; addr += PAGE_SIZE;
+			ret = direct_remap_pfns_range(vma, addr & PAGE_MASK,
+						      mbc.mfn, nr * PAGE_SIZE,
+						      vma->vm_page_prot, m.dom,
+						      mmapbatch_cb, &mbc);
+			if (ret) {
+				if (ret == -ENOENT)
+					ret = -EFAULT;
+				break;
 			}
+			i += nr;
+			addr += nr * PAGE_SIZE;
 		}
 
 		up_write(&mm->mmap_sem);
-		if (ret > 0) {
+
+		if (!ret && mbc.status) {
 			p = m.arr;
 			i = 0;
-			if (paged_out)
-				ret = -ENOENT;
-			else
-				ret = 0;
 			list_for_each(l, &pagelist) {
 				if (i)
 					cond_resched();
 
 				nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
-				mfn = (unsigned long *)(l + 1);
-				if (copy_to_user(p, mfn, nr*sizeof(*mfn)))
+				if (copy_to_user(p, l + 1, nr * sizeof(*p)))
 					ret = -EFAULT;
 				i += nr; p += nr;
 			}
+			if (mbc.status == -ENOENT)
+				ret = -ENOENT;
 		}
 	mmapbatch_out:
 		i = 0;
@@ -332,8 +345,6 @@ static long privcmd_ioctl(struct file *f
 	case IOCTL_PRIVCMD_MMAPBATCH_V2: {
 		privcmd_mmapbatch_v2_t m;
 		const xen_pfn_t __user *p;
-		xen_pfn_t *mfn;
-		int *err;
 
 		if (!is_initial_xendomain())
 			return -EPERM;
@@ -362,9 +373,9 @@ static long privcmd_ioctl(struct file *f
 			INIT_LIST_HEAD(l);
 			list_add_tail(l, &pagelist);
 
-			mfn = (void *)(l + 1);
+			mbc.mfn = (void *)(l + 1);
 			ret = -EFAULT;
-			if (copy_from_user(mfn, p, nr * sizeof(*mfn)))
+			if (copy_from_user(mbc.mfn, p, nr * sizeof(*mbc.mfn)))
 				goto mmapbatch_v2_out;
 		}
 
@@ -381,51 +392,48 @@ static long privcmd_ioctl(struct file *f
 		}
 
 		i = 0;
-		ret = 0;
-		paged_out = 0;
+		mbc.status = 0;
 		list_for_each(l, &pagelist) {
 			if (i)
 				cond_resched();
 
-			nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
-			mfn = (void *)(l + 1);
-			err = (void *)(l + 1);
-			BUILD_BUG_ON(sizeof(*err) > sizeof(*mfn));
-
-			while (i < nr) {
-				int rc;
-
-				rc = direct_remap_pfn_range(vma, addr & PAGE_MASK,
-				                            *mfn, PAGE_SIZE,
-				                            vma->vm_page_prot, m.dom);
-				if (rc < 0) {
-					if (rc == -ENOENT)
-						paged_out = 1;
-					ret++;
-				} else
-					BUG_ON(rc > 0);
-				*err++ = rc;
-				mfn++; i++; addr += PAGE_SIZE;
+			nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
+			mbc.mfn = (void *)(l + 1);
+			mbc.err = (void *)(l + 1);
+			BUILD_BUG_ON(sizeof(*mbc.err) > sizeof(*mbc.mfn));
+
+			ret = direct_remap_pfns_range(vma, addr & PAGE_MASK,
+						      mbc.mfn, nr * PAGE_SIZE,
+						      vma->vm_page_prot, m.dom,
+						      mmapbatch_cb, &mbc);
+			if (ret) {
+				if (ret == -ENOENT)
+					ret = -EFAULT;
+				break;
 			}
+			i += nr;
+			addr += nr * PAGE_SIZE;
 		}
 
 		up_write(&mm->mmap_sem);
 
-		if (ret > 0) {
+		if (ret)
+			;
+		else if (mbc.status) {
 			int __user *p = m.err;
 
-			ret = paged_out ? -ENOENT : 0;
 			i = 0;
 			list_for_each(l, &pagelist) {
 				if (i)
 					cond_resched();
 
 				nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
-				err = (void *)(l + 1);
-				if (copy_to_user(p, err, nr * sizeof(*err)))
+				if (copy_to_user(p, l + 1, nr * sizeof(*p)))
 					ret = -EFAULT;
 				i += nr; p += nr;
 			}
+			if (mbc.status == -ENOENT)
+				ret = -ENOENT;
 		} else if (clear_user(m.err, nr_pages * sizeof(*m.err)))
 			ret = -EFAULT;
 
