@@ -1196,6 +1196,7 @@ struct fuse_fill_data {
struct file *file;
struct inode *inode;
unsigned nr_pages;
+ struct page **orig_pages;
};
static int fuse_readpages_fill(void *_data, struct page *page)
@@ -2135,6 +2136,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_file *ff;
loff_t off = -1;
+ int num_pages = req->num_pages;
/* we can acquire ff here because we do have locked pages here! */
ff = fuse_write_file(fc, fi);
@@ -2180,7 +2182,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
if (i == 0)
off = page_offset(page);
- end_page_writeback(page);
+ data->orig_pages[i] = page;
}
if (!all_ok) {
@@ -2192,6 +2194,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
__free_page(page);
req->pages[i] = NULL;
}
+ end_page_writeback(data->orig_pages[i]);
}
spin_lock(&fc->lock);
@@ -2218,6 +2221,9 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
fuse_flush_writepages(data->inode);
spin_unlock(&fc->lock);
+ for (i = 0; i < num_pages; i++)
+ end_page_writeback(data->orig_pages[i]);
+
fuse_release_ff(inode, ff);
return 0;
}
@@ -2371,11 +2377,17 @@ static int fuse_writepages(struct address_space *mapping,
if (ff)
fuse_release_ff(inode, ff);
+ data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
+ sizeof(struct page *),
+ GFP_NOFS);
+ if (!data.orig_pages)
+ goto out;
+
data.inode = inode;
data.req = fuse_request_alloc_nofs(fc, FUSE_MAX_PAGES_PER_REQ);
err = -ENOMEM;
if (!data.req)
- goto out;
+ goto out_and_free;
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.req) {
@@ -2386,6 +2398,8 @@ static int fuse_writepages(struct address_space *mapping,
} else
fuse_put_request(fc, data.req);
}
+out_and_free:
+ kfree(data.orig_pages);
out:
return err;
}
Some operations (flush, fsync, fallocate, fiemap) want to flush dirty pages to fused before doing something. So they perform to steps: 1) filemap_write_and_wait 2) fuse_sync_writes The first of them must ensure that all dirty pages went to fuse-writeback and the second supposedly flushes pages under fuse-writeback to fused. This works in most of cases but there is a race: 1. Doing routine writeback, the fluser calls fuse_writepages. The latter eventually calls fuse_send_writepages. The latter copies original pages to temporary ones, then calls end_page_writeback for each original page. 2. Some operation (flush, fsync, fallocate, fiemap) performs 1) and 2): filemap_write_and_wait does nothing becase on previous step we called end_page_writeback for each page, and fuse_sync_writes does nothing as well because fi->queued_writes is still empty (fuse_send_writepages has not called fuse_flush_writepages yet). The operation immediately proceeds and completes. 3. fuse_send_writepages from 1. above calls fuse_flush_writepages that starts actual flushing dirty pages to fused. The result is disastrous: any cached write may bypass such operations (flush, fsync, fallocate, fiemap). The patch fixes the problem in a straightforward way: postponing end_page_writeback until fuse_flush_writepages is really called (mainline fuse does the same). https://jira.sw.ru/browse/PSBM-68454 Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com> --- fs/fuse/file.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-)