[Devel,rh7,v2,1/3] fs/cleancache: fix data invalidation in the cleancache during direct_io

Submitted by Andrey Ryabinin on April 12, 2017, 3:52 p.m.

Details

Message ID 20170412155233.1707-1-aryabinin@virtuozzo.com
State New
Series "fs/cleancache: fix data invalidation in the cleancache during direct_io"
Headers show

Commit Message

Andrey Ryabinin April 12, 2017, 3:52 p.m.
Currently some direct_io fs hooks call invalidate_inode_pages2_range()
conditionally iff mapping->nrpages is not zero. So if nrpages is zero,
data in cleancache wouldn't be invalidated. So the next buffered read
may get stale data from the cleancache.

Fix this by calling invalidate_inode_pages2_range() regardless of nrpages
value. And if nrpages is zero, bail out from invalidate_inode_pages2_range()
only after cleancache_invalidate_inode(), so that we invalidate cleancache
but still avoid pointless page cache lookups.

https://jira.sw.ru/browse/PSBM-63908
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 fs/9p/vfs_file.c  |  4 ++--
 fs/nfs/direct.c   | 16 ++++++----------
 fs/nfs/inode.c    |  7 ++++---
 fs/xfs/xfs_file.c | 30 ++++++++++++++----------------
 mm/filemap.c      | 28 ++++++++++++----------------
 mm/truncate.c     |  4 ++++
 6 files changed, 42 insertions(+), 47 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 7da03f8..afe0036 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -482,7 +482,7 @@  v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
 	if (invalidate && (total > 0)) {
 		pg_start = origin >> PAGE_CACHE_SHIFT;
 		pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
-		if (inode->i_mapping && inode->i_mapping->nrpages)
+		if (inode->i_mapping)
 			invalidate_inode_pages2_range(inode->i_mapping,
 						      pg_start, pg_end);
 		*offset += total;
@@ -688,7 +688,7 @@  v9fs_direct_write(struct file *filp, const char __user * data,
 	 * about to write.  We do this *before* the write so that if we fail
 	 * here we fall back to buffered write
 	 */
-	if (mapping->nrpages) {
+	{
 		pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
 		pgoff_t pg_end   = (offset + count - 1) >> PAGE_CACHE_SHIFT;
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ab96f01..9bbbb63 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -1132,12 +1132,10 @@  ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	if (result)
 		goto out_unlock;
 
-	if (mapping->nrpages) {
-		result = invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_CACHE_SHIFT, end);
-		if (result)
-			goto out_unlock;
-	}
+	result = invalidate_inode_pages2_range(mapping,
+				pos >> PAGE_CACHE_SHIFT, end);
+	if (result)
+		goto out_unlock;
 
 	task_io_account_write(count);
 
@@ -1161,10 +1159,8 @@  ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
 
-	if (mapping->nrpages) {
-		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT, end);
-	}
+	invalidate_inode_pages2_range(mapping,
+				pos >> PAGE_CACHE_SHIFT, end);
 
 	mutex_unlock(&inode->i_mutex);
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8c06aed..779b05c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1065,10 +1065,11 @@  static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
 			if (ret < 0)
 				return ret;
 		}
-		ret = invalidate_inode_pages2(mapping);
-		if (ret < 0)
-			return ret;
 	}
+	ret = invalidate_inode_pages2(mapping);
+	if (ret < 0)
+		return ret;
+
 	if (S_ISDIR(inode->i_mode)) {
 		spin_lock(&inode->i_lock);
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9a2193b..0b7a35b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -346,7 +346,7 @@  xfs_file_aio_read(
 	 * serialisation.
 	 */
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
+	if ((ioflags & XFS_IO_ISDIRECT)) {
 		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 
@@ -361,22 +361,20 @@  xfs_file_aio_read(
 		 * flush and reduce the chances of repeated iolock cycles going
 		 * forward.
 		 */
-		if (inode->i_mapping->nrpages) {
-			ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
-			if (ret) {
-				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-				return ret;
-			}
-
-			/*
-			 * Invalidate whole pages. This can return an error if
-			 * we fail to invalidate a page, but this should never
-			 * happen on XFS. Warn if it does fail.
-			 */
-			ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
-			WARN_ON_ONCE(ret);
-			ret = 0;
+		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+		if (ret) {
+			xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
+			return ret;
 		}
+
+		/*
+		 * Invalidate whole pages. This can return an error if
+		 * we fail to invalidate a page, but this should never
+		 * happen on XFS. Warn if it does fail.
+		 */
+		ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+		WARN_ON_ONCE(ret);
+		ret = 0;
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 16517c6..8e608fc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2598,18 +2598,16 @@  generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
 	 * about to write.  We do this *before* the write so that we can return
 	 * without clobbering -EIOCBQUEUED from ->direct_IO().
 	 */
-	if (mapping->nrpages) {
-		written = invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_CACHE_SHIFT, end);
-		/*
-		 * If a page can not be invalidated, return 0 to fall back
-		 * to buffered write.
-		 */
-		if (written) {
-			if (written == -EBUSY)
-				return 0;
-			goto out;
-		}
+	written = invalidate_inode_pages2_range(mapping,
+						pos >> PAGE_CACHE_SHIFT, end);
+	/*
+	 * If a page can not be invalidated, return 0 to fall back
+	 * to buffered write.
+	 */
+	if (written) {
+		if (written == -EBUSY)
+			return 0;
+		goto out;
 	}
 
 	written = mapping_direct_IO(mapping, WRITE, iocb, iter, pos);
@@ -2622,10 +2620,8 @@  generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
 	 * so we don't support it 100%.  If this invalidation
 	 * fails, tough, the write still worked...
 	 */
-	if (mapping->nrpages) {
-		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT, end);
-	}
+	invalidate_inode_pages2_range(mapping,
+				pos >> PAGE_CACHE_SHIFT, end);
 
 	if (written > 0) {
 		pos += written;
diff --git a/mm/truncate.c b/mm/truncate.c
index f460e67..ce4b1d8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -628,6 +628,10 @@  int invalidate_inode_pages2_range(struct address_space *mapping,
 	int did_range_unmap = 0;
 
 	cleancache_invalidate_inode(mapping);
+
+	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
+		return 0;
+
 	pagevec_init(&pvec, 0);
 	index = start;
 	while (index <= end && __pagevec_lookup(&pvec, mapping, index,

Comments

Dmitry Monakhov April 13, 2017, 6:51 a.m.
Andrey Ryabinin <aryabinin@virtuozzo.com> writes:

> Currently some direct_io fs hooks call invalidate_inode_pages2_range()
> conditionally iff mapping->nrpages is not zero. So if nrpages is zero,
> data in cleancache wouldn't be invalidated. So the next buffered read
> may get stale data from the cleancache.

>
> Fix this by calling invalidate_inode_pages2_range() regardless of nrpages
> value. And if nrpages is zero, bail out from invalidate_inode_pages2_range()
> only after cleancache_invalidate_inode(), so that we invalidate cleancache
> but still avoid pointless page cache lookups.
BTW, can we please make tcache plugable. So one who do not want fancy
caching features can simply disable it. As we do with pfcache.

>
> https://jira.sw.ru/browse/PSBM-63908
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
>  fs/9p/vfs_file.c  |  4 ++--
>  fs/nfs/direct.c   | 16 ++++++----------
>  fs/nfs/inode.c    |  7 ++++---
>  fs/xfs/xfs_file.c | 30 ++++++++++++++----------------
>  mm/filemap.c      | 28 ++++++++++++----------------
>  mm/truncate.c     |  4 ++++
>  6 files changed, 42 insertions(+), 47 deletions(-)
>
> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
> index 7da03f8..afe0036 100644
> --- a/fs/9p/vfs_file.c
> +++ b/fs/9p/vfs_file.c
> @@ -482,7 +482,7 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
>  	if (invalidate && (total > 0)) {
>  		pg_start = origin >> PAGE_CACHE_SHIFT;
>  		pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
> -		if (inode->i_mapping && inode->i_mapping->nrpages)
> +		if (inode->i_mapping)
>  			invalidate_inode_pages2_range(inode->i_mapping,
>  						      pg_start, pg_end);
>  		*offset += total;
> @@ -688,7 +688,7 @@ v9fs_direct_write(struct file *filp, const char __user * data,
>  	 * about to write.  We do this *before* the write so that if we fail
>  	 * here we fall back to buffered write
>  	 */
> -	if (mapping->nrpages) {
> +	{
>  		pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
>  		pgoff_t pg_end   = (offset + count - 1) >> PAGE_CACHE_SHIFT;
>  
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index ab96f01..9bbbb63 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -1132,12 +1132,10 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
>  	if (result)
>  		goto out_unlock;
>  
> -	if (mapping->nrpages) {
> -		result = invalidate_inode_pages2_range(mapping,
> -					pos >> PAGE_CACHE_SHIFT, end);
> -		if (result)
> -			goto out_unlock;
> -	}
> +	result = invalidate_inode_pages2_range(mapping,
> +				pos >> PAGE_CACHE_SHIFT, end);
> +	if (result)
> +		goto out_unlock;
>  
>  	task_io_account_write(count);
>  
> @@ -1161,10 +1159,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
>  
>  	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
>  
> -	if (mapping->nrpages) {
> -		invalidate_inode_pages2_range(mapping,
> -					      pos >> PAGE_CACHE_SHIFT, end);
> -	}
> +	invalidate_inode_pages2_range(mapping,
> +				pos >> PAGE_CACHE_SHIFT, end);
>  
>  	mutex_unlock(&inode->i_mutex);
>  
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index 8c06aed..779b05c 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -1065,10 +1065,11 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
>  			if (ret < 0)
>  				return ret;
>  		}
> -		ret = invalidate_inode_pages2(mapping);
> -		if (ret < 0)
> -			return ret;
>  	}
> +	ret = invalidate_inode_pages2(mapping);
> +	if (ret < 0)
> +		return ret;
> +
>  	if (S_ISDIR(inode->i_mode)) {
>  		spin_lock(&inode->i_lock);
>  		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 9a2193b..0b7a35b 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -346,7 +346,7 @@ xfs_file_aio_read(
>  	 * serialisation.
>  	 */
>  	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
> -	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
> +	if ((ioflags & XFS_IO_ISDIRECT)) {
>  		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
>  		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
>  
> @@ -361,22 +361,20 @@ xfs_file_aio_read(
>  		 * flush and reduce the chances of repeated iolock cycles going
>  		 * forward.
>  		 */
> -		if (inode->i_mapping->nrpages) {
> -			ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
> -			if (ret) {
> -				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> -				return ret;
> -			}
> -
> -			/*
> -			 * Invalidate whole pages. This can return an error if
> -			 * we fail to invalidate a page, but this should never
> -			 * happen on XFS. Warn if it does fail.
> -			 */
> -			ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
> -			WARN_ON_ONCE(ret);
> -			ret = 0;
> +		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
> +		if (ret) {
> +			xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> +			return ret;
>  		}
> +
> +		/*
> +		 * Invalidate whole pages. This can return an error if
> +		 * we fail to invalidate a page, but this should never
> +		 * happen on XFS. Warn if it does fail.
> +		 */
> +		ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
> +		WARN_ON_ONCE(ret);
> +		ret = 0;
>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>  	}
>  
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 16517c6..8e608fc 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2598,18 +2598,16 @@ generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
>  	 * about to write.  We do this *before* the write so that we can return
>  	 * without clobbering -EIOCBQUEUED from ->direct_IO().
>  	 */
> -	if (mapping->nrpages) {
> -		written = invalidate_inode_pages2_range(mapping,
> -					pos >> PAGE_CACHE_SHIFT, end);
> -		/*
> -		 * If a page can not be invalidated, return 0 to fall back
> -		 * to buffered write.
> -		 */
> -		if (written) {
> -			if (written == -EBUSY)
> -				return 0;
> -			goto out;
> -		}
> +	written = invalidate_inode_pages2_range(mapping,
> +						pos >> PAGE_CACHE_SHIFT, end);
> +	/*
> +	 * If a page can not be invalidated, return 0 to fall back
> +	 * to buffered write.
> +	 */
> +	if (written) {
> +		if (written == -EBUSY)
> +			return 0;
> +		goto out;
>  	}
>  
>  	written = mapping_direct_IO(mapping, WRITE, iocb, iter, pos);
> @@ -2622,10 +2620,8 @@ generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
>  	 * so we don't support it 100%.  If this invalidation
>  	 * fails, tough, the write still worked...
>  	 */
> -	if (mapping->nrpages) {
> -		invalidate_inode_pages2_range(mapping,
> -					      pos >> PAGE_CACHE_SHIFT, end);
> -	}
> +	invalidate_inode_pages2_range(mapping,
> +				pos >> PAGE_CACHE_SHIFT, end);
>  
>  	if (written > 0) {
>  		pos += written;
> diff --git a/mm/truncate.c b/mm/truncate.c
> index f460e67..ce4b1d8 100644
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -628,6 +628,10 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
>  	int did_range_unmap = 0;
>  
>  	cleancache_invalidate_inode(mapping);
> +
> +	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
> +		return 0;
> +
>  	pagevec_init(&pvec, 0);
>  	index = start;
>  	while (index <= end && __pagevec_lookup(&pvec, mapping, index,
> -- 
> 2.10.2
Konstantin Khorenko April 13, 2017, 7:39 a.m.
--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 04/13/2017 09:51 AM, Dmitry Monakhov wrote:
> Andrey Ryabinin <aryabinin@virtuozzo.com> writes:
>
>> Currently some direct_io fs hooks call invalidate_inode_pages2_range()
>> conditionally iff mapping->nrpages is not zero. So if nrpages is zero,
>> data in cleancache wouldn't be invalidated. So the next buffered read
>> may get stale data from the cleancache.
>
>>
>> Fix this by calling invalidate_inode_pages2_range() regardless of nrpages
>> value. And if nrpages is zero, bail out from invalidate_inode_pages2_range()
>> only after cleancache_invalidate_inode(), so that we invalidate cleancache
>> but still avoid pointless page cache lookups.
> BTW, can we please make tcache plugable. So one who do not want fancy
> caching features can simply disable it. As we do with pfcache.

tcache/tswap disable/enable tweak (vz7: patch "Subject: [PATCH rh7] tcache/tswap: enable by default")
tcache/swap can be disabled/enabled using following commands:
echo 'N' > /sys/module/{tcache,tswap}/parameters/active
echo 'Y' > /sys/module/{tcache,tswap}/parameters/active

per-cgroup on the fly: echo 1 > @memory.disable_cleancache

To disable tcache at boot time: "tcache.enabled=0" kernel option

>
>>
>> https://jira.sw.ru/browse/PSBM-63908
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> ---
>>  fs/9p/vfs_file.c  |  4 ++--
>>  fs/nfs/direct.c   | 16 ++++++----------
>>  fs/nfs/inode.c    |  7 ++++---
>>  fs/xfs/xfs_file.c | 30 ++++++++++++++----------------
>>  mm/filemap.c      | 28 ++++++++++++----------------
>>  mm/truncate.c     |  4 ++++
>>  6 files changed, 42 insertions(+), 47 deletions(-)
>>
>> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
>> index 7da03f8..afe0036 100644
>> --- a/fs/9p/vfs_file.c
>> +++ b/fs/9p/vfs_file.c
>> @@ -482,7 +482,7 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
>>  	if (invalidate && (total > 0)) {
>>  		pg_start = origin >> PAGE_CACHE_SHIFT;
>>  		pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
>> -		if (inode->i_mapping && inode->i_mapping->nrpages)
>> +		if (inode->i_mapping)
>>  			invalidate_inode_pages2_range(inode->i_mapping,
>>  						      pg_start, pg_end);
>>  		*offset += total;
>> @@ -688,7 +688,7 @@ v9fs_direct_write(struct file *filp, const char __user * data,
>>  	 * about to write.  We do this *before* the write so that if we fail
>>  	 * here we fall back to buffered write
>>  	 */
>> -	if (mapping->nrpages) {
>> +	{
>>  		pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
>>  		pgoff_t pg_end   = (offset + count - 1) >> PAGE_CACHE_SHIFT;
>>
>> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
>> index ab96f01..9bbbb63 100644
>> --- a/fs/nfs/direct.c
>> +++ b/fs/nfs/direct.c
>> @@ -1132,12 +1132,10 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
>>  	if (result)
>>  		goto out_unlock;
>>
>> -	if (mapping->nrpages) {
>> -		result = invalidate_inode_pages2_range(mapping,
>> -					pos >> PAGE_CACHE_SHIFT, end);
>> -		if (result)
>> -			goto out_unlock;
>> -	}
>> +	result = invalidate_inode_pages2_range(mapping,
>> +				pos >> PAGE_CACHE_SHIFT, end);
>> +	if (result)
>> +		goto out_unlock;
>>
>>  	task_io_account_write(count);
>>
>> @@ -1161,10 +1159,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
>>
>>  	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
>>
>> -	if (mapping->nrpages) {
>> -		invalidate_inode_pages2_range(mapping,
>> -					      pos >> PAGE_CACHE_SHIFT, end);
>> -	}
>> +	invalidate_inode_pages2_range(mapping,
>> +				pos >> PAGE_CACHE_SHIFT, end);
>>
>>  	mutex_unlock(&inode->i_mutex);
>>
>> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
>> index 8c06aed..779b05c 100644
>> --- a/fs/nfs/inode.c
>> +++ b/fs/nfs/inode.c
>> @@ -1065,10 +1065,11 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
>>  			if (ret < 0)
>>  				return ret;
>>  		}
>> -		ret = invalidate_inode_pages2(mapping);
>> -		if (ret < 0)
>> -			return ret;
>>  	}
>> +	ret = invalidate_inode_pages2(mapping);
>> +	if (ret < 0)
>> +		return ret;
>> +
>>  	if (S_ISDIR(inode->i_mode)) {
>>  		spin_lock(&inode->i_lock);
>>  		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index 9a2193b..0b7a35b 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -346,7 +346,7 @@ xfs_file_aio_read(
>>  	 * serialisation.
>>  	 */
>>  	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
>> -	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
>> +	if ((ioflags & XFS_IO_ISDIRECT)) {
>>  		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
>>  		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
>>
>> @@ -361,22 +361,20 @@ xfs_file_aio_read(
>>  		 * flush and reduce the chances of repeated iolock cycles going
>>  		 * forward.
>>  		 */
>> -		if (inode->i_mapping->nrpages) {
>> -			ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
>> -			if (ret) {
>> -				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>> -				return ret;
>> -			}
>> -
>> -			/*
>> -			 * Invalidate whole pages. This can return an error if
>> -			 * we fail to invalidate a page, but this should never
>> -			 * happen on XFS. Warn if it does fail.
>> -			 */
>> -			ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
>> -			WARN_ON_ONCE(ret);
>> -			ret = 0;
>> +		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
>> +		if (ret) {
>> +			xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>> +			return ret;
>>  		}
>> +
>> +		/*
>> +		 * Invalidate whole pages. This can return an error if
>> +		 * we fail to invalidate a page, but this should never
>> +		 * happen on XFS. Warn if it does fail.
>> +		 */
>> +		ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
>> +		WARN_ON_ONCE(ret);
>> +		ret = 0;
>>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>>  	}
>>
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index 16517c6..8e608fc 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -2598,18 +2598,16 @@ generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
>>  	 * about to write.  We do this *before* the write so that we can return
>>  	 * without clobbering -EIOCBQUEUED from ->direct_IO().
>>  	 */
>> -	if (mapping->nrpages) {
>> -		written = invalidate_inode_pages2_range(mapping,
>> -					pos >> PAGE_CACHE_SHIFT, end);
>> -		/*
>> -		 * If a page can not be invalidated, return 0 to fall back
>> -		 * to buffered write.
>> -		 */
>> -		if (written) {
>> -			if (written == -EBUSY)
>> -				return 0;
>> -			goto out;
>> -		}
>> +	written = invalidate_inode_pages2_range(mapping,
>> +						pos >> PAGE_CACHE_SHIFT, end);
>> +	/*
>> +	 * If a page can not be invalidated, return 0 to fall back
>> +	 * to buffered write.
>> +	 */
>> +	if (written) {
>> +		if (written == -EBUSY)
>> +			return 0;
>> +		goto out;
>>  	}
>>
>>  	written = mapping_direct_IO(mapping, WRITE, iocb, iter, pos);
>> @@ -2622,10 +2620,8 @@ generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
>>  	 * so we don't support it 100%.  If this invalidation
>>  	 * fails, tough, the write still worked...
>>  	 */
>> -	if (mapping->nrpages) {
>> -		invalidate_inode_pages2_range(mapping,
>> -					      pos >> PAGE_CACHE_SHIFT, end);
>> -	}
>> +	invalidate_inode_pages2_range(mapping,
>> +				pos >> PAGE_CACHE_SHIFT, end);
>>
>>  	if (written > 0) {
>>  		pos += written;
>> diff --git a/mm/truncate.c b/mm/truncate.c
>> index f460e67..ce4b1d8 100644
>> --- a/mm/truncate.c
>> +++ b/mm/truncate.c
>> @@ -628,6 +628,10 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
>>  	int did_range_unmap = 0;
>>
>>  	cleancache_invalidate_inode(mapping);
>> +
>> +	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
>> +		return 0;
>> +
>>  	pagevec_init(&pvec, 0);
>>  	index = start;
>>  	while (index <= end && __pagevec_lookup(&pvec, mapping, index,
>> --
>> 2.10.2
> .
>