[RHEL7,COMMIT] bio_vec-backed iov_iter

Submitted by Konstantin Khorenko on May 25, 2020, 2:52 p.m.

Details

Message ID 202005251452.04PEqSAR004540@finist-ce7.sw.ru
State New
Series "fs, direct_IO: Switch to iov_iter and allow bio_vec for ext4"
Headers show

Commit Message

Konstantin Khorenko May 25, 2020, 2:52 p.m.
The commit is pushed to "branch-rh7-3.10.0-1127.8.2.vz7.161.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.8.2.vz7.161.1
------>
commit 43e0fc3ae1bbc64cf2a14056e6ecabdc323f02cb
Author: Al Viro <viro@zeniv.linux.org.uk>
Date:   Mon May 25 17:52:28 2020 +0300

    bio_vec-backed iov_iter
    
    ms commit 62a8067a7f35 (partial)
    
    New variant of iov_iter - ITER_BVEC in iter->type, backed with
    bio_vec array instead of iovec one.  Primitives taught to deal
    with such beasts, __swap_write() switched to using that kind
    of iov_iter.
    
    Note that bio_vec is just a <page, offset, length> triple - there's
    nothing block-specific about it.  I've left the definition where it
    was, but took it from under ifdef CONFIG_BLOCK.
    
    Next target: ->splice_write()...
    
    Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    
    =====================
    Patchset description:
    [00/30] fs,direct_IO: Switch to iov_iter and allow bio_vec for ext4
    
    This patchset transforms direct_IO callbacks, blockdev_direct_IO
    and its underlining functions to iov_iter, and introduces complete
    support of iov_iter for ext4.
    
    Supported iov_iter subtypes for ext4 is iovec and bio_vec. The first
    is for traditional user-submitted aio, while bio_vec is the type,
    which is important for us, since we use it in ploop.
    
    bio_vec operates with pages instead of user addresses (like iovec
    does), so it requires specific callbacks in do_blockdev_direct_IO()
    and in the functions it calls.
    
    The patchset reworks do_blockdev_direct_IO() in the same manner
    as in mainstrean. The most of rest patches are prepared manually,
    since we have significant differences to ms (RHEL7 patches, our
    direct IO patches for FUSE; all they have changed many functions).
    At the end, kaio engine (resulting in direct_IO) became possible
    to be enabled for ext4.
    
    https://jira.sw.ru/browse/PSBM-99793
---
 mm/iov-iter.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/mm/iov-iter.c b/mm/iov-iter.c
index 3a3a51cf3b598..04d92fb9bfcff 100644
--- a/mm/iov-iter.c
+++ b/mm/iov-iter.c
@@ -912,9 +912,26 @@  unsigned long iov_iter_alignment(struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_alignment);
 
-ssize_t iov_iter_get_pages(struct iov_iter *i,
-		   struct page **pages, size_t maxsize,
-		   size_t *start, int rw)
+static ssize_t get_pages_bvec(struct iov_iter *i,
+		struct page **pages, size_t maxsize,
+		size_t *start)
+{
+	struct bio_vec *bvec = iov_iter_bvec(i);
+	size_t len = bvec->bv_len - i->iov_offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	*start = bvec->bv_offset + i->iov_offset;
+
+	get_page(*pages = bvec->bv_page);
+
+	return len;
+}
+
+static ssize_t get_pages_iovec(struct iov_iter *i,
+		struct page **pages, size_t maxsize,
+		size_t *start, int rw)
 {
 	size_t offset = i->iov_offset;
 	const struct iovec *iov = iov_iter_iovec(i);
@@ -937,9 +954,43 @@  ssize_t iov_iter_get_pages(struct iov_iter *i,
 		return res;
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+			   struct page **pages, size_t maxsize,
+			   size_t *start, int rw)
+{
+	if (iov_iter_has_bvec(i))
+		return get_pages_bvec(i, pages, maxsize, start);
+	else
+		return get_pages_iovec(i, pages, maxsize, start, rw);
+}
 EXPORT_SYMBOL(iov_iter_get_pages);
 
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
+static int iov_iter_npages_bvec(struct iov_iter *i, int maxpages)
+{
+	size_t offset = i->iov_offset;
+	size_t size = i->count;
+	struct bio_vec *bvec = iov_iter_bvec(i);
+	int npages = 0;
+	int n;
+
+	for (n = 0; size && n < i->nr_segs; n++, bvec++) {
+		size_t len = bvec->bv_len - offset;
+		offset = 0;
+		if (unlikely(!len))	/* empty segment */
+			continue;
+		if (len > size)
+			len = size;
+		npages++;
+		if (npages >= maxpages)	/* don't bother going further */
+			return maxpages;
+		size -= len;
+		offset = 0;
+	}
+	return min(npages, maxpages);
+}
+
+static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
 {
 	size_t offset = i->iov_offset;
 	size_t size = i->count;
@@ -964,4 +1015,12 @@  int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	}
 	return min(npages, maxpages);
 }
+
+int iov_iter_npages(struct iov_iter *i, int maxpages)
+{
+	if (iov_iter_has_bvec(i))
+		return iov_iter_npages_bvec(i, maxpages);
+	else
+		return iov_iter_npages_iovec(i, maxpages);
+}
 EXPORT_SYMBOL(iov_iter_npages);