From: Dongxiao Xu <dongxiao.xu@intel.com>
Subject: Netback: Multiple tasklets support.
Patch-mainline: Never, SUSE-Xen specific

  Now netback uses one pair of tasklets for Tx/Rx data transaction. Netback
  tasklet could only run at one CPU at a time, and it is used to serve all the
  netfronts. Therefore it has become a performance bottle neck. This patch is to
  use multiple tasklet pairs to replace the current single pair in dom0.

  Assuming that Dom0 has CPUNR VCPUs, we define CPUNR kinds of tasklets pair
  (CPUNR for Tx, and CPUNR for Rx). Each pare of tasklets serve specific group of
  netfronts. Also for those global and static variables, we duplicated them for
  each group in order to avoid the spinlock.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>

jb: some cleanups
Acked-by: jbeulich@novell.com

--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -49,6 +49,7 @@
 typedef struct netif_st {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
+	unsigned int     group;
 	unsigned int     handle;
 
 	u8               fe_dev_addr[6];
@@ -242,6 +243,8 @@ struct netbk_tx_pending_inuse {
 #define MAX_MFN_ALLOC 64
 
 struct xen_netbk {
+	atomic_t nr_groups;
+
 	struct {
 		pending_ring_idx_t pending_prod, pending_cons;
 		pending_ring_idx_t dealloc_prod, dealloc_cons;
@@ -280,4 +283,8 @@ struct xen_netbk {
 		unsigned long mfn_list[MAX_MFN_ALLOC];
 	} rx;
 };
+
+extern struct xen_netbk *xen_netbk;
+extern unsigned int netbk_nr_groups;
+
 #endif /* __NETIF__BACKEND__COMMON_H__ */
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -56,6 +56,23 @@ module_param_named(queue_length, netbk_q
 
 static void __netif_up(netif_t *netif)
 {
+	unsigned int group = 0;
+	unsigned int min_groups = atomic_read(&xen_netbk[0].nr_groups);
+	unsigned int i;
+
+	/* Find the list which contains least number of domains. */
+	for (i = 1; i < netbk_nr_groups; i++) {
+		unsigned int nr_groups = atomic_read(&xen_netbk[i].nr_groups);
+
+		if (nr_groups < min_groups) {
+			group = i;
+			min_groups = nr_groups;
+		}
+	}
+
+	atomic_inc(&xen_netbk[group].nr_groups);
+	netif->group = group;
+
 	if (netif->busted) {
 		netif->busted = 0;
 		enable_irq(netif->irq);
@@ -66,8 +83,13 @@ static void __netif_up(netif_t *netif)
 
 static void __netif_down(netif_t *netif)
 {
+	struct xen_netbk *netbk = xen_netbk + netif->group;
+
 	disable_irq(netif->irq);
 	netif_deschedule_work(netif);
+
+	netif->group = UINT_MAX;
+	atomic_dec(&netbk->nr_groups);
 }
 
 static int net_open(struct net_device *dev)
@@ -199,6 +221,7 @@ netif_t *netif_alloc(struct device *pare
 
 	netif = netdev_priv(dev);
 	netif->domid  = domid;
+	netif->group = UINT_MAX;
 	netif->handle = handle;
 	netif->can_sg = 1;
 	netif->csum = 1;
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -47,10 +47,10 @@
 
 /*define NETBE_DEBUG_INTERRUPT*/
 
-static struct xen_netbk *__read_mostly xen_netbk;
-static const unsigned int netbk_nr_groups = 1;
+struct xen_netbk *__read_mostly xen_netbk;
+unsigned int __read_mostly netbk_nr_groups;
 
-#define GET_GROUP_INDEX(netif) (0)
+#define GET_GROUP_INDEX(netif) ((netif)->group)
 
 struct netbk_rx_cb {
 	unsigned int nr_frags;
@@ -168,6 +168,8 @@ MODULE_PARM_DESC(copy_skb, "Copy data re
 static bool MODPARM_permute_returns;
 module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
 MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
+module_param_named(groups, netbk_nr_groups, uint, 0);
+MODULE_PARM_DESC(groups, "Specify the number of tasklet pairs to use");
 
 int netbk_copy_skb_mode;
 
@@ -357,10 +359,16 @@ static unsigned int netbk_count_slots(co
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	netif_t *netif = netdev_priv(dev);
+	unsigned int group = GET_GROUP_INDEX(netif);
 	struct xen_netbk_rx *netbk;
 
 	BUG_ON(skb->dev != dev);
 
+	if (unlikely(group >= netbk_nr_groups)) {
+		BUG_ON(group != UINT_MAX);
+		goto drop;
+	}
+
 	/* Drop the packet if the target domain has no receive buffers. */
 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
 		goto drop;
@@ -409,7 +417,7 @@ int netif_be_start_xmit(struct sk_buff *
 		}
 	}
 
-	netbk = &xen_netbk[GET_GROUP_INDEX(netif)].rx;
+	netbk = &xen_netbk[group].rx;
 	skb_queue_tail(&netbk->queue, skb);
 	tasklet_schedule(&netbk->tasklet);
 
@@ -480,8 +488,11 @@ static void netbk_gop_frag(netif_t *neti
 		netif_get_page_ext(page, netbk, idx);
 		if (netbk) {
 			struct pending_tx_info *src_pend;
+			unsigned int grp;
 
 			src_pend = &netbk->tx.pending_info[idx];
+			grp = GET_GROUP_INDEX(src_pend->netif);
+			BUG_ON(netbk != &xen_netbk[grp] && grp != UINT_MAX);
 			copy_gop->source.domid = src_pend->netif->domid;
 			copy_gop->source.u.ref = src_pend->req.gref;
 			copy_gop->flags |= GNTCOPY_source_gref;
@@ -1052,7 +1063,7 @@ static inline void net_tx_action_dealloc
 		/* Ensure we see all indices enqueued by netif_idx_release(). */
 		smp_rmb();
 
-		if (MODPARM_permute_returns)
+		if (MODPARM_permute_returns && netbk_nr_groups == 1)
 			permute_dealloc_ring(netbk->tx.dealloc_ring, dc, dp);
 
 		while (dc != dp) {
@@ -1850,6 +1861,17 @@ static void netif_page_release(struct pa
 irqreturn_t netif_be_int(int irq, void *dev_id)
 {
 	netif_t *netif = dev_id;
+	unsigned int group = GET_GROUP_INDEX(netif);
+
+	if (unlikely(group >= netbk_nr_groups)) {
+		/*
+		 * Short of having a way to bind the IRQ in disabled mode
+		 * (IRQ_NOAUTOEN), we have to ignore the first invocation(s)
+		 * (before we got assigned to a group).
+		 */
+		BUG_ON(group != UINT_MAX);
+		return IRQ_HANDLED;
+	}
 
 	add_to_net_schedule_list_tail(netif);
 	maybe_schedule_tx_action(netif);
@@ -1985,9 +2007,20 @@ static int __init netback_init(void)
 		max_tx_slots = XEN_NETIF_NR_SLOTS_MIN;
 	}
 
-	xen_netbk = vzalloc(netbk_nr_groups * sizeof(*xen_netbk));
+	group = netbk_nr_groups;
+	if (!netbk_nr_groups)
+		netbk_nr_groups = (num_online_cpus() + 1) / 2;
+	if (netbk_nr_groups > MAX_GROUPS)
+		netbk_nr_groups = MAX_GROUPS;
+
+	do {
+		xen_netbk = vzalloc(netbk_nr_groups * sizeof(*xen_netbk));
+	} while (!xen_netbk && (netbk_nr_groups >>= 1));
 	if (!xen_netbk)
 		return -ENOMEM;
+	if (group && netbk_nr_groups != group)
+		pr_warn("netback: only using %u (instead of %u) groups\n",
+			netbk_nr_groups, group);
 
 	/* We can increase reservation by this much in net_rx_action(). */
 	balloon_update_driver_allowance(netbk_nr_groups * NET_RX_RING_SIZE);
