From: Dan Magenheimer <dan.magenheimer@oracle.com>
Subject: [PATCH] xen: prepare tmem shim to handle frontswap
Patch-mainline: tbd

Provide the shim code for frontswap even if the frontswap patchset
is not present yet.

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Acked-by: jbeulich@novell.com

--- sle11sp3.orig/drivers/xen/Kconfig	2011-11-04 15:02:57.000000000 +0100
+++ sle11sp3/drivers/xen/Kconfig	2012-10-19 15:11:06.000000000 +0200
@@ -519,6 +519,13 @@ config SWIOTLB_XEN
 	depends on PARAVIRT_XEN && PCI
 	select SWIOTLB
 
+config XEN_TMEM
+	bool
+	default y if (CLEANCACHE || FRONTSWAP)
+	help
+	  Shim to interface in-kernel Transcendent Memory hooks
+	  (e.g. cleancache and frontswap) to Xen tmem hypercalls.
+
 config XEN_XENCOMM
 	bool
 
--- sle11sp3.orig/drivers/xen/Makefile	2011-07-01 16:01:23.000000000 +0200
+++ sle11sp3/drivers/xen/Makefile	2012-10-19 15:11:12.000000000 +0200
@@ -8,7 +8,6 @@ xen-balloon_$(CONFIG_XEN)	:= balloon/
 obj-$(CONFIG_XEN)		+= core/
 obj-$(CONFIG_XEN)		+= console/
 obj-y				+= xenbus/
-obj-y				+= tmem.o
 obj-$(CONFIG_XEN)		+= char/
 
 xen-backend-$(CONFIG_XEN_BACKEND)	:= util.o
@@ -34,6 +33,7 @@ obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
+obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
 
--- sle11sp3.orig/drivers/xen/tmem.c	2011-07-04 14:40:05.000000000 +0200
+++ sle11sp3/drivers/xen/tmem.c	2011-11-14 11:28:27.000000000 +0100
@@ -1,7 +1,7 @@
 /*
  * Xen implementation for transcendent memory (tmem)
  *
- * Copyright (C) 2009-2010 Oracle Corp.  All rights reserved.
+ * Copyright (C) 2009-2011 Oracle Corp.  All rights reserved.
  * Author: Dan Magenheimer
  */
 
@@ -9,8 +9,14 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
+#include <linux/module.h>
 #include <linux/cleancache.h>
 
+/* temporary ifdef until include/linux/frontswap.h is upstream */
+#ifdef CONFIG_FRONTSWAP
+#include <linux/frontswap.h>
+#endif
+
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/tmem.h>
@@ -109,14 +115,8 @@ static int xen_tmem_flush_object(u32 poo
 	return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
 }
 
-static int xen_tmem_destroy_pool(u32 pool_id)
-{
-	struct tmem_oid oid = { { 0 } };
-
-	return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
-}
-
-int tmem_enabled;
+int tmem_enabled __read_mostly;
+EXPORT_SYMBOL(tmem_enabled);
 
 static int __init enable_tmem(char *s)
 {
@@ -127,6 +127,13 @@ static int __init enable_tmem(char *s)
 __setup("tmem", enable_tmem);
 
 #ifdef CONFIG_CLEANCACHE
+static int xen_tmem_destroy_pool(u32 pool_id)
+{
+	struct tmem_oid oid = { { 0 } };
+
+	return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
+}
+
 /* cleancache ops */
 
 static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
@@ -230,17 +237,152 @@ static struct cleancache_ops tmem_cleanc
 };
 #endif
 
-static int __init xen_tmem_init(void)
+#ifdef CONFIG_FRONTSWAP
+/* frontswap tmem operations */
+
+/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+static int tmem_frontswap_poolid;
+
+/*
+ * Swizzling increases objects per swaptype, increasing tmem concurrency
+ * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
+ */
+#define SWIZ_BITS		4
+#define SWIZ_MASK		((1 << SWIZ_BITS) - 1)
+#define _oswiz(_type, _ind)	((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
+#define iswiz(_ind)		(_ind >> SWIZ_BITS)
+
+static inline struct tmem_oid oswiz(unsigned type, u32 ind)
+{
+	struct tmem_oid oid = { .oid = { 0 } };
+	oid.oid[0] = _oswiz(type, ind);
+	return oid;
+}
+
+/* returns 0 if the page was successfully put into frontswap, -1 if not */
+static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
+				   struct page *page)
+{
+	u64 ind64 = (u64)offset;
+	u32 ind = (u32)offset;
+	unsigned long pfn = page_to_pfn(page);
+	int pool = tmem_frontswap_poolid;
+	int ret;
+
+	if (pool < 0)
+		return -1;
+	if (ind64 != ind)
+		return -1;
+	mb(); /* ensure page is quiescent; tmem may address it with an alias */
+	ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+	/* translate Xen tmem return values to linux semantics */
+	if (ret == 1)
+		return 0;
+	else
+		return -1;
+}
+
+/* returns 0 if the page was successfully gotten from frontswap, -1 if
+ * was not present (should never happen!) */
+static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
+				   struct page *page)
+{
+	u64 ind64 = (u64)offset;
+	u32 ind = (u32)offset;
+	unsigned long pfn = page_to_pfn(page);
+	int pool = tmem_frontswap_poolid;
+	int ret;
+
+	if (pool < 0)
+		return -1;
+	if (ind64 != ind)
+		return -1;
+	ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+	/* translate Xen tmem return values to linux semantics */
+	if (ret == 1)
+		return 0;
+	else
+		return -1;
+}
+
+/* flush a single page from frontswap */
+static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
 {
-	struct cleancache_ops old_ops;
+	u64 ind64 = (u64)offset;
+	u32 ind = (u32)offset;
+	int pool = tmem_frontswap_poolid;
 
+	if (pool < 0)
+		return;
+	if (ind64 != ind)
+		return;
+	(void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
+}
+
+/* flush all pages from the passed swaptype */
+static void tmem_frontswap_flush_area(unsigned type)
+{
+	int pool = tmem_frontswap_poolid;
+	int ind;
+
+	if (pool < 0)
+		return;
+	for (ind = SWIZ_MASK; ind >= 0; ind--)
+		(void)xen_tmem_flush_object(pool, oswiz(type, ind));
+}
+
+static void tmem_frontswap_init(unsigned ignored)
+{
+	struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
+
+	/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+	if (tmem_frontswap_poolid < 0)
+		tmem_frontswap_poolid =
+		    xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
+}
+
+static int use_frontswap = 1;
+
+static int __init no_frontswap(char *s)
+{
+	use_frontswap = 0;
+	return 1;
+}
+
+__setup("nofrontswap", no_frontswap);
+
+static struct frontswap_ops tmem_frontswap_ops = {
+	.put_page = tmem_frontswap_put_page,
+	.get_page = tmem_frontswap_get_page,
+	.invalidate_page = tmem_frontswap_flush_page,
+	.invalidate_area = tmem_frontswap_flush_area,
+	.init = tmem_frontswap_init
+};
+#endif
+
+static int __init xen_tmem_init(void)
+{
 	if (!xen_domain())
 		return 0;
+#ifdef CONFIG_FRONTSWAP
+	if (tmem_enabled && use_frontswap) {
+		char *s = "";
+		struct frontswap_ops old_ops =
+			frontswap_register_ops(&tmem_frontswap_ops);
+
+		tmem_frontswap_poolid = -1;
+		if (old_ops.init != NULL)
+			s = " (WARNING: frontswap_ops overridden)";
+		printk(KERN_INFO "frontswap enabled, RAM provided by "
+				 "Xen Transcendent Memory\n");
+	}
+#endif
 #ifdef CONFIG_CLEANCACHE
 	BUILD_BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
 	if (tmem_enabled && use_cleancache) {
 		char *s = "";
-		old_ops = cleancache_register_ops(&tmem_cleancache_ops);
+		struct cleancache_ops old_ops =
+			cleancache_register_ops(&tmem_cleancache_ops);
 		if (old_ops.init_fs != NULL)
 			s = " (WARNING: cleancache_ops overridden)";
 		printk(KERN_INFO "cleancache enabled, RAM provided by "
