From aa6bf01d391935a8929333bc2e243084ea0c58db Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 29 Feb 2012 09:53:48 +0000
Subject: xfs: use per-filesystem I/O completion workqueues
Git-commit: aa6bf01d391935a8929333bc2e243084ea0c58db
Patch-mainline: v3.4-rc1
References: bnc#846036

The new concurrency managed workqueues are cheap enough that we can create
per-filesystem instead of global workqueues.  This allows us to remove the
trylock or defer scheme on the ilock, which is not helpful once we have
outstanding log reservations until finishing a size update.

Also allow the default concurrency on this workqueues so that I/O completions
blocking on the ilock for one inode do not block process for another inode.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
 fs/xfs/linux-2.6/xfs_aops.c  |   44 ++++++++++-------------------------
 fs/xfs/linux-2.6/xfs_aops.h  |    2 -
 fs/xfs/linux-2.6/xfs_buf.c   |   21 +----------------
 fs/xfs/linux-2.6/xfs_super.c |   53 ++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_mount.h           |    5 ++++
 5 files changed, 72 insertions(+), 53 deletions(-)

--- a/fs/xfs/linux-2.6/xfs_aops.c	2013-10-30 13:21:55.000000000 -0400
+++ b/fs/xfs/linux-2.6/xfs_aops.c	2013-10-30 13:21:56.000000000 -0400
@@ -170,21 +170,15 @@ static inline bool xfs_ioend_is_append(s
  * will be the intended file size until i_size is updated.  If this write does
  * not extend all the way to the valid file size then restrict this update to
  * the end of the write.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
  */
-STATIC int
+STATIC void
 xfs_setfilesize(
-	xfs_ioend_t		*ioend)
+	struct xfs_ioend	*ioend)
 {
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_fsize_t		isize;
 
-	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-		return EAGAIN;
-
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	isize = xfs_ioend_new_eof(ioend);
 	if (isize) {
 		ip->i_d.di_size = isize;
@@ -192,7 +186,6 @@ xfs_setfilesize(
 	}
 
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return 0;
 }
 
 /*
@@ -206,10 +199,12 @@ xfs_finish_ioend(
 	struct xfs_ioend	*ioend)
 {
 	if (atomic_dec_and_test(&ioend->io_remaining)) {
+		struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
+
 		if (ioend->io_type == IO_UNWRITTEN)
-			queue_work(xfsconvertd_workqueue, &ioend->io_work);
+			queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
 		else if (xfs_ioend_is_append(ioend))
-			queue_work(xfsdatad_workqueue, &ioend->io_work);
+			queue_work(mp->m_data_workqueue, &ioend->io_work);
 		else
 			xfs_destroy_ioend(ioend);
 	}
@@ -250,26 +245,13 @@ xfs_end_io(
 	 * We might have to update the on-disk file size after extending
 	 * writes.
 	 */
-	error = xfs_setfilesize(ioend);
-	ASSERT(!error || error == EAGAIN);
+	xfs_setfilesize(ioend);
 
 done:
-	/*
-	 * If we didn't complete processing of the ioend, requeue it to the
-	 * tail of the workqueue for another attempt later. Otherwise destroy
-	 * it.
-	 */
-	if (error == EAGAIN) {
-		atomic_inc(&ioend->io_remaining);
-		xfs_finish_ioend(ioend);
-		/* ensure we don't spin on blocked ioends */
-		delay(1);
-	} else {
-		if (ioend->io_iocb)
-			aio_complete(ioend->io_iocb, ioend->io_error ?
-					ioend->io_error : ioend->io_result, 0);
-		xfs_destroy_ioend(ioend);
-	}
+	if (ioend->io_iocb)
+		aio_complete(ioend->io_iocb, ioend->io_error ?
+				ioend->io_error : ioend->io_result, 0);
+	xfs_destroy_ioend(ioend);
 }
 
 /*
--- a/fs/xfs/linux-2.6/xfs_aops.h	2013-10-30 13:21:54.000000000 -0400
+++ b/fs/xfs/linux-2.6/xfs_aops.h	2013-10-30 13:21:56.000000000 -0400
@@ -18,8 +18,6 @@
 #ifndef __XFS_AOPS_H__
 #define __XFS_AOPS_H__
 
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
 extern mempool_t *xfs_ioend_pool;
 
 /*
--- a/fs/xfs/linux-2.6/xfs_buf.c	2013-10-30 13:21:54.000000000 -0400
+++ b/fs/xfs/linux-2.6/xfs_buf.c	2013-10-30 13:25:06.000000000 -0400
@@ -46,8 +46,6 @@ STATIC int xfsbufd(void *);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
 
 static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
 
 #ifdef XFS_BUF_LOCK_TRACKING
 # define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)
@@ -1839,8 +1837,8 @@ xfs_flush_buftarg(
 	LIST_HEAD(wait_list);
 	struct blk_plug plug;
 
-	xfs_buf_runall_queues(xfsconvertd_workqueue);
-	xfs_buf_runall_queues(xfsdatad_workqueue);
+	xfs_buf_runall_queues(target->bt_mount->m_unwritten_workqueue);
+	xfs_buf_runall_queues(target->bt_mount->m_data_workqueue);
 	xfs_buf_runall_queues(xfslogd_workqueue);
 
 	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
@@ -1893,21 +1891,8 @@ xfs_buf_init(void)
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-	xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-	if (!xfsdatad_workqueue)
-		goto out_destroy_xfslogd_workqueue;
-
-	xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-						WQ_MEM_RECLAIM, 1);
-	if (!xfsconvertd_workqueue)
-		goto out_destroy_xfsdatad_workqueue;
-
 	return 0;
 
- out_destroy_xfsdatad_workqueue:
-	destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-	destroy_workqueue(xfslogd_workqueue);
  out_free_buf_zone:
 	kmem_zone_destroy(xfs_buf_zone);
  out:
@@ -1917,8 +1902,6 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
-	destroy_workqueue(xfsconvertd_workqueue);
-	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
 	kmem_zone_destroy(xfs_buf_zone);
 }
--- a/fs/xfs/linux-2.6/xfs_super.c	2013-10-30 13:21:54.000000000 -0400
+++ b/fs/xfs/linux-2.6/xfs_super.c	2013-10-30 13:41:28.000000000 -0400
@@ -64,6 +64,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/parser.h>
+#include <linux/workqueue.h>
 
 static const struct super_operations xfs_super_operations;
 static kmem_zone_t *xfs_ioend_zone;
@@ -823,6 +824,49 @@ xfs_flush_inodes(
 	}
 }
 
+STATIC int
+xfs_init_mount_workqueues(
+	struct xfs_mount	*mp)
+{
+	char *wqname = kasprintf(GFP_KERNEL, "xfs-data/%s", mp->m_fsname);
+	if (!wqname)
+		return -ENOMEM;
+
+	mp->m_data_workqueue_name = wqname;
+	mp->m_data_workqueue = alloc_workqueue(wqname, WQ_MEM_RECLAIM, 0);
+	if (!mp->m_data_workqueue)
+		goto out;
+
+	wqname = kasprintf(GFP_KERNEL, "xfs-conv/%s", mp->m_fsname);
+	if (!wqname)
+		goto out_destroy_data_iodone_queue;
+
+	mp->m_unwritten_workqueue_name = wqname;
+	mp->m_unwritten_workqueue = alloc_workqueue(wqname, WQ_MEM_RECLAIM, 0);
+	if (!mp->m_unwritten_workqueue)
+		goto out_free_data_conv_queue_name;
+
+	return 0;
+
+out_free_data_conv_queue_name:
+	kfree(mp->m_unwritten_workqueue_name);
+out_destroy_data_iodone_queue:
+	destroy_workqueue(mp->m_data_workqueue);
+out:
+	kfree(mp->m_data_workqueue_name);
+	return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_mount_workqueues(
+	struct xfs_mount	*mp)
+{
+	kfree(mp->m_data_workqueue_name);
+	destroy_workqueue(mp->m_data_workqueue);
+	kfree(mp->m_unwritten_workqueue_name);
+	destroy_workqueue(mp->m_unwritten_workqueue);
+}
+
 /* Catch misguided souls that try to use this interface on XFS */
 STATIC struct inode *
 xfs_fs_alloc_inode(
@@ -1067,6 +1111,7 @@ xfs_fs_put_super(
 	xfs_unmountfs(mp);
 	xfs_freesb(mp);
 	xfs_icsb_destroy_counters(mp);
+	xfs_destroy_mount_workqueues(mp);
 	xfs_close_devices(mp);
 	xfs_dmops_put(mp);
 	xfs_free_fsname(mp);
@@ -1409,10 +1454,14 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_put_dmops;
 
-	error = xfs_icsb_init_counters(mp);
+	error = xfs_init_mount_workqueues(mp);
 	if (error)
 		goto out_close_devices;
 
+	error = xfs_icsb_init_counters(mp);
+	if (error)
+		goto out_destroy_workqueues;
+
 	error = xfs_readsb(mp, flags);
 	if (error)
 		goto out_destroy_counters;
@@ -1482,6 +1531,8 @@ xfs_fs_fill_super(
 	xfs_freesb(mp);
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
+out_destroy_workqueues:
+	xfs_destroy_mount_workqueues(mp);
  out_close_devices:
 	xfs_close_devices(mp);
  out_put_dmops:
--- a/fs/xfs/xfs_mount.h	2013-10-30 13:21:54.000000000 -0400
+++ b/fs/xfs/xfs_mount.h	2013-10-30 13:37:44.000000000 -0400
@@ -214,6 +214,11 @@ typedef struct xfs_mount {
 	struct shrinker		m_inode_shrink;	/* inode reclaim shrinker */
 	int64_t			m_low_space[XFS_LOWSP_MAX];
 						/* low free space thresholds */
+
+	const char		*m_data_workqueue_name;
+	struct workqueue_struct	*m_data_workqueue;
+	const char		*m_unwritten_workqueue_name;
+	struct workqueue_struct	*m_unwritten_workqueue;
 	struct vfsmount         *m_vfsmount;	/* Unused */
 	spinlock_t		m_vfsmount_lock;	/* Unused */
 	const char		*m_mtpt;
