From: Olaf Hering <ohering@suse.de>
Date: Wed, 21 Aug 2013 20:23:06 +0200
Subject: xen: pv-on-hvm: skip initialization of emulated devices
References: fate#311487, bnc#922309
Patch-mainline: n/a

Remove the need for special modprobe.conf rules in xem-kmp.  With this
change the modprobe.conf rule is not needed anymore because all the
native drivers for emulated hardware will skip their initialization in
PV-on-HVM guests.

The rules in xen-kmp forced the usage of pv drivers in a PV-on-HVM guest.
Emulated hardware will be shutdown by the pv core driver
(xen-platform-pci.ko) and the pv drivers take ownership.

Three ways exist to drive virtual hardware in the guest:
* in the guest config file on the host "xen_platform_pci=1" will provide
  a Xen platform PCI device as a vehicle to make use of
  paravirtualisation features. This is the default in the xen toolstack.
  This patch adds a quirk for this device to disable native drivers for
  emulated network hardware, and to disable access to disk devices in
  ata_piix. CDROM devices are still handled by ata_piix.
* booting the guest kernel with "xen_emul_unplug=never" to let the guest
  use emulated hardware without paravirtualisation features. The cmdline
  option is used by the pvops kernel. This is useful for testing, not
  supported by us. Our xen-kmp needs a patch to recognize this option.
* booting the guest kernel with "xen_emul_unplug=ide-disks|nics" to let
  the guest use selected emulate hardware. Paravirtualized drivers will
  be disabled for the selected emulated hardware type.
* disable the Xen platform PCI device on the host with
  "xen_platform_pci=0". This will require native drivers for emulated
  hardware in the guest. Useful for testing and not supported by us.

Virtual disks will be provided in three ways to the guest:
vdev=hda - provides the disk via emulated piix IDE controller
vdev=sda - provides the disk via emulated LSI SCSI controller
vdev=xvda - provides the disk only via pv-drivers

At least one IDE disk is required to boot.
SCSI disks do not exist for two reasons: they were shutdown by the old
unplug protocol, and the sd_mod driver is built-in so xen:vbd can not
claim the sd major.

With xen_emul_unplug=never the xen:vbd driver will skip IDE disks and
provide only xvd* disks.

Since this patch is for the forward-ported xenlinux sources its not
targeted for mainline kernel.

unmodified_drivers/linux-2.6/platform-pci/platform-pci.c from xen-kmp
has to include a new header to get the unplug defines. To allow
compiliation of xen-kmp with older kernel-source a guard has to be
provided to skip inclusion of the new header. An easy place was
include/xen/features.h, but any header included by platform-pci.c would
do.

Signed-off-by: Olaf Hering <ohering@suse.de>

--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -94,6 +94,7 @@
 #include <scsi/scsi_host.h>
 #include <linux/libata.h>
 #include <linux/dmi.h>
+#include <xen/xen_pvonhvm.h>
 
 #define DRV_NAME	"ata_piix"
 #define DRV_VERSION	"2.13"
@@ -1645,6 +1646,8 @@ static void piix_ignore_devices_quirk(st
 			dmi->ident);
 	}
 #endif
+	if (xen_pvonhvm_unplugged_disks)
+		host->flags |= ATA_HOST_IGNORE_ATA;
 }
 
 /**
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -76,6 +76,7 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
+#include <xen/xen_pvonhvm.h>
 
 /* VLAN tagging feature enable/disable */
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
@@ -2083,6 +2084,9 @@ static struct pci_driver cp_driver = {
 
 static int __init cp_init (void)
 {
+	if (xen_pvonhvm_unplugged_nics)
+		return -EBUSY;
+
 #ifdef MODULE
 	pr_info("%s", version);
 #endif
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -112,6 +112,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 #include <asm/irq.h>
+#include <xen/xen_pvonhvm.h>
 
 #define RTL8139_DRIVER_NAME   DRV_NAME " Fast Ethernet driver " DRV_VERSION
 
@@ -2600,6 +2601,9 @@ static struct pci_driver rtl8139_pci_dri
 
 static int __init rtl8139_init_module (void)
 {
+	if (xen_pvonhvm_unplugged_nics)
+		return -EBUSY;
+
 	/* when we're a module, we always print a version message,
 	 * even if no 8139 board is found.
 	 */
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -172,6 +172,7 @@
 #include <linux/firmware.h>
 #include <linux/rtnetlink.h>
 #include <asm/unaligned.h>
+#include <xen/xen_pvonhvm.h>
 
 
 #define DRV_NAME		"e100"
@@ -3144,6 +3145,9 @@ static struct pci_driver e100_driver = {
 
 static int __init e100_init_module(void)
 {
+	if (xen_pvonhvm_unplugged_nics)
+		return -EBUSY;
+
 	if (((1 << debug) - 1) & NETIF_MSG_DRV) {
 		pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
 		pr_info("%s\n", DRV_COPYRIGHT);
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -32,6 +32,7 @@
 #include <linux/prefetch.h>
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
+#include <xen/xen_pvonhvm.h>
 
 char e1000_driver_name[] = "e1000";
 static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
@@ -245,6 +246,10 @@ struct net_device *e1000_get_hw_dev(stru
 static int __init e1000_init_module(void)
 {
 	int ret;
+
+	if (xen_pvonhvm_unplugged_nics)
+		return -EBUSY;
+
 	pr_info("%s - version %s\n", e1000_driver_string, e1000_driver_version);
 
 	pr_info("%s\n", e1000_copyright);
--- a/drivers/net/ne2k-pci.c
+++ b/drivers/net/ne2k-pci.c
@@ -58,6 +58,7 @@ static int options[MAX_UNITS];
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
+#include <xen/xen_pvonhvm.h>
 
 #include "8390.h"
 
@@ -709,6 +710,9 @@ static struct pci_driver ne2k_driver = {
 
 static int __init ne2k_pci_init(void)
 {
+	if (xen_pvonhvm_unplugged_nics)
+		return -EBUSY;
+
 /* when a module, this is printed whether or not devices are found in probe */
 #ifdef MODULE
 	printk(version);
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -57,6 +57,7 @@ static const char *const version =
 
 #include <asm/dma.h>
 #include <asm/irq.h>
+#include <xen/xen_pvonhvm.h>
 
 /*
  * PCI device identifiers for "new style" Linux PCI Device Drivers
@@ -2892,6 +2893,9 @@ MODULE_LICENSE("GPL");
 
 static int __init pcnet32_init_module(void)
 {
+	if (xen_pvonhvm_unplugged_nics)
+		return -EBUSY;
+
 	pr_info("%s", version);
 
 	pcnet32_debug = netif_msg_init(debug, PCNET32_MSG_DEFAULT);
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -28,6 +28,56 @@
 #include <asm/dma.h>	/* isa_dma_bridge_buggy */
 #include "pci.h"
 
+#ifndef CONFIG_XEN
+#include <xen/xen_pvonhvm.h>
+/*
+ * Disable native drivers for emulated hardware until xen-platform-pci.ko
+ * from xen-kmp loads. Its not predictable which modular or built-in driver
+ * is initialized before xen-platform-pci is loaded. The default is to skip
+ * init of native drivers unless booted with xen_emul_unplug=options.
+ */
+int xen_pvonhvm_unplug;
+EXPORT_SYMBOL_GPL(xen_pvonhvm_unplug);
+
+static int __init parse_xen_emul_unplug(char *arg)
+{
+	char *p, *q;
+	int l;
+
+	for (p = arg; p; p = q) {
+		q = strchr(p, ',');
+		if (q) {
+			l = q - p;
+			q++;
+		} else {
+			l = strlen(p);
+		}
+		if (!strncmp(p, "all", l))
+			xen_pvonhvm_unplug |= XEN_PVONHVM_UNPLUG_ALL;
+		else if (!strncmp(p, "ide-disks", l))
+			xen_pvonhvm_unplug |= XEN_PVONHVM_UNPLUG_IDE_DISKS;
+		else if (!strncmp(p, "nics", l))
+			xen_pvonhvm_unplug |= XEN_PVONHVM_UNPLUG_NICS;
+		else if (!strncmp(p, "never", l))
+			xen_pvonhvm_unplug = XEN_PVONHVM_UNPLUG_NEVER;
+		else
+			printk(KERN_WARNING "unrecognised option '%s' "
+				 "in parameter 'xen_emul_unplug'\n", p);
+	}
+	return 0;
+}
+__setup("xen_emul_unplug=", parse_xen_emul_unplug);
+
+static void quirk_xen_pvonhvm(struct pci_dev *dev)
+{
+	if (!xen_pvonhvm_unplug) {
+		/* Default to unplug everything, unless booted with xen_emul_unplug= */
+		xen_pvonhvm_unplug = XEN_PVONHVM_UNPLUG_ALL;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM, quirk_xen_pvonhvm);
+#endif
+
 /*
  * This quirk function disables memory decoding and releases memory resources
  * of the device specified by kernel's boot parameter 'pci=resource_alignment='.
--- a/drivers/xen/blkfront/vbd.c
+++ b/drivers/xen/blkfront/vbd.c
@@ -36,6 +36,7 @@
 #include <linux/bitmap.h>
 #include <linux/blkdev.h>
 #include <linux/list.h>
+#include <xen/xen_pvonhvm.h>
 
 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
 #include <xen/platform-compat.h>
@@ -399,6 +400,37 @@ xlvbd_init_blk_queue(struct gendisk *gd,
 	return 0;
 }
 
+static inline bool
+xlvbd_ide_unplugged(int major)
+{
+#ifndef CONFIG_XEN
+	/*
+	 * For HVM guests:
+	 * - pv driver required if booted with xen_emul_unplug=ide-disks|all
+	 * - native driver required with xen_emul_unplug=nics|never
+	 */
+	if (xen_pvonhvm_unplugged_disks)
+		return true;
+
+	switch (major) {
+	case IDE0_MAJOR:
+	case IDE1_MAJOR:
+	case IDE2_MAJOR:
+	case IDE3_MAJOR:
+	case IDE4_MAJOR:
+	case IDE5_MAJOR:
+	case IDE6_MAJOR:
+	case IDE7_MAJOR:
+	case IDE8_MAJOR:
+	case IDE9_MAJOR:
+		return false;
+	default:
+		break;
+	}
+#endif
+	return true;
+}
+
 int
 xlvbd_add(blkif_sector_t capacity, int vdevice, unsigned int vdisk_info,
 	  unsigned int sector_size, unsigned int physical_sector_size,
@@ -432,6 +464,10 @@ xlvbd_add(blkif_sector_t capacity, int v
 			return -ENODEV;
 		}
 	}
+	if (!xlvbd_ide_unplugged(major)) {
+		pr_warning("blkfront: skipping IDE major on %x, native driver required\n", vdevice);
+		return -ENODEV;
+	}
 
 	BUG_ON(info->gd != NULL);
 	BUG_ON(info->mi != NULL);
--- a/drivers/xen/netfront/netfront.c
+++ b/drivers/xen/netfront/netfront.c
@@ -66,6 +66,7 @@
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 #include <xen/net-util.h>
+#include <xen/xen_pvonhvm.h>
 
 struct netfront_cb {
 	struct page *page;
@@ -254,6 +255,35 @@ static void __devinit netfront_enable_ar
 #endif
 }
 
+static bool netfront_nic_unplugged(struct xenbus_device *dev)
+{
+	bool ret = true;
+#ifndef CONFIG_XEN
+	char *typestr;
+	/*
+	 * For HVM guests:
+	 * - pv driver required if booted with xen_emul_unplug=nics|all
+	 * - native driver required with xen_emul_unplug=ide-disks|never
+	 */
+
+	/* Use pv driver if emulated hardware was unplugged */
+	if (xen_pvonhvm_unplugged_nics)
+		return true;
+
+	typestr  = xenbus_read(XBT_NIL, dev->otherend, "type", NULL);
+
+	/* Assume emulated+pv interface (ioemu+vif) when type property is missing. */
+	if (IS_ERR(typestr))
+		return false;
+
+	/* If the interface is emulated and not unplugged, skip it. */
+	if (strcmp(typestr, "vif_ioemu") == 0 || strcmp(typestr, "ioemu") == 0)
+		ret = false;
+	kfree(typestr);
+#endif
+	return ret;
+}
+
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffers for communication with the backend, and
@@ -266,6 +296,11 @@ static int __devinit netfront_probe(stru
 	struct net_device *netdev;
 	struct netfront_info *info;
 
+	if (!netfront_nic_unplugged(dev)) {
+		pr_warning("netfront: skipping emulated interface, native driver required\n");
+		return -ENODEV;
+	}
+
 	netdev = create_netdev(dev);
 	if (IS_ERR(netdev)) {
 		err = PTR_ERR(netdev);
--- a/include/xen/features.h
+++ b/include/xen/features.h
@@ -21,4 +21,15 @@ static inline int xen_feature(int flag)
 	return xen_features[flag];
 }
 
+/*
+ * unmodified_drivers/linux-2.6/platform-pci/platform-pci.c from xen-kmp has to
+ * include a new header to get the unplug defines from xen/xen_pvonhvm.h.  To
+ * allow compiliation of xen-kmp with older kernel-source a guard has to be
+ * provided to skip inclusion of the new header.  An easy place was
+ * xen/features.h, but any header included by platform-pci.c would do.
+ */
+#ifndef CONFIG_XEN
+#define HAVE_XEN_PVONHVM_UNPLUG 1
+#endif
+
 #endif /* __XEN_FEATURES_H__ */
--- /dev/null
+++ b/include/xen/xen_pvonhvm.h
@@ -0,0 +1,17 @@
+#ifndef _XEN_PVONHVM_H
+#define _XEN_PVONHVM_H
+
+#ifdef CONFIG_XEN
+#define xen_pvonhvm_unplug 0
+#else
+extern int xen_pvonhvm_unplug;
+#endif
+
+#define XEN_PVONHVM_UNPLUG_IDE_DISKS (1 << 1)
+#define XEN_PVONHVM_UNPLUG_NICS      (1 << 2)
+#define XEN_PVONHVM_UNPLUG_NEVER     (1 << 3)
+#define XEN_PVONHVM_UNPLUG_ALL       (XEN_PVONHVM_UNPLUG_IDE_DISKS | XEN_PVONHVM_UNPLUG_NICS)
+#define xen_pvonhvm_unplugged_disks  (xen_pvonhvm_unplug & XEN_PVONHVM_UNPLUG_IDE_DISKS)
+#define xen_pvonhvm_unplugged_nics   (xen_pvonhvm_unplug & XEN_PVONHVM_UNPLUG_NICS)
+
+#endif /* _XEN_PVONHVM_H */
