[RHEL7,COMMIT] ms/bcache: Data corruption fix #PSBM-106785

Submitted by Vasily Averin on Sept. 15, 2020, 8:55 a.m.

Details

Message ID 202009150855.08F8t5EA012698@vz7build.vvs.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Vasily Averin Sept. 15, 2020, 8:55 a.m.
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.21
------>
commit 0cf06d79c7f1e7cefd8de081c412e966e58d4b83
Author: Kent Overstreet <kmo@daterainc.com>
Date:   Tue Sep 15 11:55:05 2020 +0300

    ms/bcache: Data corruption fix #PSBM-106785
    
    The code that handles overlapping extents that we've just read back in from disk
    was depending on the behaviour of the code that handles overlapping extents as
    we're inserting into a btree node in the case of an insert that forced an
    existing extent to be split: on insert, if we had to split we'd also insert a
    new extent to represent the top part of the old extent - and then that new
    extent would get written out.
    
    The code that read the extents back in thus not bother with splitting extents -
    if it saw an extent that ovelapped in the middle of an older extent, it would
    trim the old extent to only represent the bottom part, assuming that the
    original insert would've inserted a new extent to represent the top part.
    
    I still haven't figured out _how_ it can happen, but I'm now pretty convinced
    (and testing has confirmed) that there's some kind of an obscure corner case
    (probably involving extent merging, and multiple overwrites in different sets)
    that breaks this. The fix is to change the mergesort fixup code to split extents
    itself when required.
    
    Signed-off-by: Kent Overstreet <kmo@daterainc.com>
    Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
    
    https://jira.sw.ru/browse/PSBM-106785
    (cherry picked from commit ef71ec00002d92a08eb27e9d036e3d48835b6597)
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 drivers/md/bcache/bset.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 14032e8..1b27cbd 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -927,7 +927,7 @@  static void sort_key_next(struct btree_iter *iter,
 		*i = iter->data[--iter->used];
 }
 
-static void btree_sort_fixup(struct btree_iter *iter)
+static struct bkey *btree_sort_fixup(struct btree_iter *iter, struct bkey *tmp)
 {
 	while (iter->used > 1) {
 		struct btree_iter_set *top = iter->data, *i = top + 1;
@@ -955,9 +955,22 @@  static void btree_sort_fixup(struct btree_iter *iter)
 		} else {
 			/* can't happen because of comparison func */
 			BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
-			bch_cut_back(&START_KEY(i->k), top->k);
+
+			if (bkey_cmp(i->k, top->k) < 0) {
+				bkey_copy(tmp, top->k);
+
+				bch_cut_back(&START_KEY(i->k), tmp);
+				bch_cut_front(i->k, top->k);
+				heap_sift(iter, 0, btree_iter_cmp);
+
+				return tmp;
+			} else {
+				bch_cut_back(&START_KEY(i->k), top->k);
+			}
 		}
 	}
+
+	return NULL;
 }
 
 static void btree_mergesort(struct btree *b, struct bset *out,
@@ -965,15 +978,20 @@  static void btree_mergesort(struct btree *b, struct bset *out,
 			    bool fixup, bool remove_stale)
 {
 	struct bkey *k, *last = NULL;
+	BKEY_PADDED(k) tmp;
 	bool (*bad)(struct btree *, const struct bkey *) = remove_stale
 		? bch_ptr_bad
 		: bch_ptr_invalid;
 
 	while (!btree_iter_end(iter)) {
 		if (fixup && !b->level)
-			btree_sort_fixup(iter);
+			k = btree_sort_fixup(iter, &tmp.k);
+		else
+			k = NULL;
+
+		if (!k)
+			k = bch_btree_iter_next(iter);
 
-		k = bch_btree_iter_next(iter);
 		if (bad(b, k))
 			continue;