[Devel,vz7] ploop: ploop_grow must nullify holes

Submitted by Maxim Patlasov on Nov. 24, 2016, 1:05 a.m.

Details

Message ID 147994934468.7179.13353605783352184485.stgit@maxim-thinkpad
State New
Series "ploop: ploop_grow must nullify holes"
Headers show

Commit Message

Maxim Patlasov Nov. 24, 2016, 1:05 a.m.
Before the patch, ploop_grow nullified only those image-blocks of
future extended BAT, who were referenced from the former BAT. That was
obviously wrong: such blocks might contain garbage that would be
interpreted as a cluster-to-image block mapping after ploop_grow.

The patch splits ploop_grow into two parts: firstly, relocate all
refrenced image-blocks of future BAT to the end of image file. Then,
secondly, nullify the whole range of future BAT.

https://jira.sw.ru/browse/PSBM-55685

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
---
 drivers/block/ploop/dev.c     |  137 +++++++++++++++++++++++++++++++++++------
 drivers/block/ploop/events.h  |    1 
 drivers/block/ploop/io_kaio.c |    4 +
 drivers/block/ploop/map.c     |   31 ++-------
 include/linux/ploop/ploop.h   |    5 +
 5 files changed, 132 insertions(+), 46 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 921ec8b..26017eb 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1359,7 +1359,8 @@  static void ploop_complete_request(struct ploop_request * preq)
 	WARN_ON(!preq->error && test_bit(PLOOP_REQ_ISSUE_FLUSH, &preq->state));
 
 	if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-	    test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
+	    test_bit(PLOOP_REQ_RELOC_S, &preq->state) ||
+	    test_bit(PLOOP_REQ_RELOC_N, &preq->state)) {
 		if (preq->error)
 			set_bit(PLOOP_S_ABORT, &plo->state);
 
@@ -1403,8 +1404,11 @@  static void ploop_complete_request(struct ploop_request * preq)
 		int i;
 		struct bio * bio = preq->aux_bio;
 
-		for (i = 0; i < bio->bi_vcnt; i++)
-			put_page(bio->bi_io_vec[i].bv_page);
+		for (i = 0; i < bio->bi_vcnt; i++) {
+			struct page *page = bio->bi_io_vec[i].bv_page;
+			if (page != ZERO_PAGE(0))
+				put_page(page);
+		}
 
 		bio_put(bio);
 
@@ -1985,6 +1989,61 @@  ploop_entry_reloc_req(struct ploop_request *preq, iblock_t *iblk)
 		BUG();
 }
 
+static void fill_zero_bio(struct ploop_device *plo, struct bio * bio)
+{
+	int pages = block_vecs(plo);
+
+	for (; bio->bi_vcnt < pages; bio->bi_vcnt++) {
+		bio->bi_io_vec[bio->bi_vcnt].bv_page = ZERO_PAGE(0);
+		bio->bi_io_vec[bio->bi_vcnt].bv_offset = 0;
+		bio->bi_io_vec[bio->bi_vcnt].bv_len = PAGE_SIZE;
+	}
+	bio->bi_sector = 0;
+	bio->bi_size = (1 << (plo->cluster_log + 9));
+}
+
+/*
+ * Returns 0 if and only if RELOC_A preq was successfully processed.
+ *
+ * Advance preq->req_cluster till it points to *iblk in grow range.
+ * Returning 0, always set *iblk to a meaningful value: either zero
+ * (if preq->req_cluster went out of allowed range or map is being read)
+ * or iblock in grow range that preq->req_cluster points to.
+ */
+static int
+ploop_entry_nullify_req(struct ploop_request *preq)
+{
+	struct ploop_device *plo       = preq->plo;
+	struct ploop_delta  *top_delta = ploop_top_delta(plo);
+	struct bio_list sbl;
+
+	if (!preq->aux_bio) {
+		preq->aux_bio = bio_alloc(GFP_NOFS, block_vecs(plo));
+		if (!preq->aux_bio)
+			return -ENOMEM;
+		fill_zero_bio(plo, preq->aux_bio);
+	}
+
+	sbl.head = sbl.tail = preq->aux_bio;
+	preq->eng_state = PLOOP_E_RELOC_NULLIFY;
+	list_del_init(&preq->list);
+
+	/*
+	 * Lately we think we does sync of nullified blocks at format
+	 * driver by image fsync before header update.
+	 * But we write this data directly into underlying device
+	 * bypassing EXT4 by usage of extent map tree
+	 * (see dio_submit()). So fsync of EXT4 image doesnt help us.
+	 * We need to force sync of nullified blocks.
+	 */
+
+	preq->eng_io = &top_delta->io;
+	set_bit(PLOOP_REQ_ISSUE_FLUSH, &preq->state);
+	top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
+				  &sbl, preq->iblock, 1<<plo->cluster_log);
+	return 0;
+}
+
 static int discard_get_index(struct ploop_request *preq)
 {
 	struct ploop_device *plo       = preq->plo;
@@ -2077,6 +2136,7 @@  static inline bool preq_is_special(struct ploop_request * preq)
 	return state & (PLOOP_REQ_MERGE_FL |
 			PLOOP_REQ_RELOC_A_FL |
 			PLOOP_REQ_RELOC_S_FL |
+			PLOOP_REQ_RELOC_N_FL |
 			PLOOP_REQ_DISCARD_FL |
 			PLOOP_REQ_ZERO_FL);
 }
@@ -2164,6 +2224,11 @@  restart:
 		if (iblk)
 			ploop_reloc_sched_read(preq, iblk);
 		return;
+	} else if (test_bit(PLOOP_REQ_RELOC_N, &preq->state)) {
+		err = ploop_entry_nullify_req(preq);
+		if (err)
+			goto error;
+		return;
 	} else if (preq->req_cluster == ~0U) {
 		BUG_ON(!test_bit(PLOOP_REQ_MERGE, &preq->state));
 		BUG_ON(preq->trans_map);
@@ -2710,7 +2775,7 @@  restart:
 
 		del_lockout(preq);
 		preq->eng_state = PLOOP_E_ENTRY;
-		preq->req_cluster++;
+		preq->iblock++;
 		goto restart;
 	}
 	case PLOOP_E_TRANS_DELTA_READ:
@@ -2866,8 +2931,11 @@  static void ploop_handle_enospc_req(struct ploop_request *preq)
 		int i;
 		struct bio * bio = preq->aux_bio;
 
-		for (i = 0; i < bio->bi_vcnt; i++)
-			put_page(bio->bi_io_vec[i].bv_page);
+		for (i = 0; i < bio->bi_vcnt; i++) {
+			struct page *page = bio->bi_io_vec[i].bv_page;
+			if (page != ZERO_PAGE(0))
+				put_page(page);
+		}
 
 		bio_put(bio);
 
@@ -4131,6 +4199,7 @@  static int ploop_clear(struct ploop_device * plo, struct block_device * bdev)
 
 	clear_bit(PLOOP_S_DISCARD_LOADED, &plo->state);
 	clear_bit(PLOOP_S_DISCARD, &plo->state);
+	clear_bit(PLOOP_S_NULLIFY, &plo->state);
 
 	destroy_deltas(plo, &plo->map);
 
@@ -4190,15 +4259,29 @@  static int ploop_index_update_ioc(struct ploop_device *plo, unsigned long arg)
 	return 0;
 }
 
-static void ploop_relocate(struct ploop_device * plo)
+enum {
+	PLOOP_GROW_RELOC = 0,
+	PLOOP_GROW_NULLIFY,
+	PLOOP_GROW_MAX,
+};
+
+static void ploop_relocate(struct ploop_device * plo, int grow_stage)
 {
 	struct ploop_request * preq;
+	int reloc_type = (grow_stage == PLOOP_GROW_RELOC) ?
+		PLOOP_REQ_RELOC_A : PLOOP_REQ_RELOC_N;
+
+	BUG_ON(grow_stage != PLOOP_GROW_RELOC &&
+	       grow_stage != PLOOP_GROW_NULLIFY);
 
 	spin_lock_irq(&plo->lock);
 
 	atomic_set(&plo->maintenance_cnt, 1);
 	plo->grow_relocated = 0;
 
+	if (grow_stage == PLOOP_GROW_NULLIFY)
+		set_bit(PLOOP_S_NULLIFY, &plo->state);
+
 	init_completion(&plo->maintenance_comp);
 
 	preq = ploop_alloc_request(plo);
@@ -4208,10 +4291,10 @@  static void ploop_relocate(struct ploop_device * plo)
 	preq->req_size = 0;
 	preq->req_rw = WRITE_SYNC;
 	preq->eng_state = PLOOP_E_ENTRY;
-	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
+	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << reloc_type);
 	preq->error = 0;
 	preq->tstamp = jiffies;
-	preq->iblock = 0;
+	preq->iblock = (reloc_type == PLOOP_REQ_RELOC_A) ? 0 : plo->grow_start;
 	preq->prealloc_size = 0;
 
 	atomic_inc(&plo->maintenance_cnt);
@@ -4235,12 +4318,16 @@  static int ploop_grow(struct ploop_device *plo, struct block_device *bdev,
 	struct ploop_delta *delta = ploop_top_delta(plo);
 	int reloc = 0; /* 'relocation needed' flag */
 	int err;
+	int grow_stage = PLOOP_GROW_RELOC;
 
 	if (!delta)
 		return -ENOENT;
 
-	if (plo->maintenance_type == PLOOP_MNTN_GROW)
+	if (plo->maintenance_type == PLOOP_MNTN_GROW) {
+		if (test_bit(PLOOP_S_NULLIFY, &plo->state))
+			grow_stage = PLOOP_GROW_NULLIFY;
 		goto already;
+	}
 
 	if (plo->maintenance_type != PLOOP_MNTN_OFF)
 		return -EBUSY;
@@ -4276,24 +4363,28 @@  static int ploop_grow(struct ploop_device *plo, struct block_device *bdev,
 	if (reloc) {
 		plo->maintenance_type = PLOOP_MNTN_GROW;
 		ploop_relax(plo);
-		ploop_relocate(plo);
+		for (; grow_stage < PLOOP_GROW_MAX; grow_stage++) {
+			ploop_relocate(plo, grow_stage);
 already:
-		err = ploop_maintenance_wait(plo);
-		if (err)
-			return err;
+			err = ploop_maintenance_wait(plo);
+			if (err)
+				return err;
 
-		BUG_ON(atomic_read(&plo->maintenance_cnt));
+			BUG_ON(atomic_read(&plo->maintenance_cnt));
 
-		if (plo->maintenance_type != PLOOP_MNTN_GROW)
-			return -EALREADY;
+			if (plo->maintenance_type != PLOOP_MNTN_GROW)
+				return -EALREADY;
 
-		if (test_bit(PLOOP_S_ABORT, &plo->state)) {
-			plo->maintenance_type = PLOOP_MNTN_OFF;
-			return -EIO;
+			if (test_bit(PLOOP_S_ABORT, &plo->state)) {
+				clear_bit(PLOOP_S_NULLIFY, &plo->state);
+				plo->maintenance_type = PLOOP_MNTN_OFF;
+				return -EIO;
+			}
 		}
 
 		ploop_quiesce(plo);
 		new_size = plo->grow_new_size;
+		clear_bit(PLOOP_S_NULLIFY, &plo->state);
 		plo->maintenance_type = PLOOP_MNTN_OFF;
 	}
 
@@ -5278,8 +5369,11 @@  static struct ploop_device *ploop_dev_init(int index)
 {
 	struct ploop_device *plo = ploop_dev_search(index);
 
-	if (plo)
+	if (plo) {
+		BUG_ON(list_empty(&plo->map.delta_list) &&
+		       test_bit(PLOOP_S_NULLIFY, &plo->state));
 		return plo;
+	}
 
 	plo = __ploop_dev_alloc(index);
 	if (plo) {
@@ -5387,6 +5481,7 @@  static int ploop_minor_open(struct inode *inode, struct file *file)
 		ploop_sysfs_init(plo);
 		ploop_dev_insert(plo);
 	}
+	BUG_ON(test_bit(PLOOP_S_NULLIFY, &plo->state));
 	set_bit(PLOOP_S_LOCKED, &plo->locking_state);
 	mutex_unlock(&ploop_devices_mutex);
 
diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
index ac0f343..62144e1 100644
--- a/drivers/block/ploop/events.h
+++ b/drivers/block/ploop/events.h
@@ -43,6 +43,7 @@ 
 			{ 1 << PLOOP_REQ_MERGE,		"M"},	\
 			{ 1 << PLOOP_REQ_RELOC_A,	"RA"},	\
 			{ 1 << PLOOP_REQ_RELOC_S,	"RS"},	\
+			{ 1 << PLOOP_REQ_RELOC_N,	"RN"},	\
 			{ 1 << PLOOP_REQ_ZERO,		"Z"},	\
 			{ 1 << PLOOP_REQ_DISCARD,	"D"})
 
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 85863df..ee9ba26 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -70,7 +70,9 @@  static void kaio_complete_io_state(struct ploop_request * preq)
 	int post_fsync = 0;
 	int need_fua = !!(preq->req_rw & REQ_FUA);
 	unsigned long state = READ_ONCE(preq->state);
-	int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL));
+	int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|
+				PLOOP_REQ_RELOC_S_FL|
+				PLOOP_REQ_RELOC_N_FL));
 
 	if (preq->error || !(preq->req_rw & REQ_FUA) ||
 	    preq->eng_state == PLOOP_E_INDEX_READ ||
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 715dc15..f21b9ab 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -1072,9 +1072,6 @@  static void map_wb_complete_post_process(struct ploop_map *map,
 					 struct ploop_request *preq, int err)
 {
 	struct ploop_device *plo       = map->plo;
-	struct ploop_delta  *top_delta = map_top_delta(map);
-	struct bio_list sbl;
-	int i;
 
 	if (likely(err ||
 		   (!test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
@@ -1098,26 +1095,14 @@  static void map_wb_complete_post_process(struct ploop_map *map,
 	BUG_ON (!test_bit(PLOOP_REQ_RELOC_A, &preq->state));
 	BUG_ON (!preq->aux_bio);
 
-	sbl.head = sbl.tail = preq->aux_bio;
-	preq->eng_state = PLOOP_E_RELOC_NULLIFY;
-	list_del_init(&preq->list);
-	for (i = 0; i < preq->aux_bio->bi_vcnt; i++)
-		memset(page_address(preq->aux_bio->bi_io_vec[i].bv_page),
-		       0, PAGE_SIZE);
-
-	/*
-	 * Lately we think we does sync of nullified blocks at format
-	 * driver by image fsync before header update.
-	 * But we write this data directly into underlying device
-	 * bypassing EXT4 by usage of extent map tree
-	 * (see dio_submit()). So fsync of EXT4 image doesnt help us.
-	 * We need to force sync of nullified blocks.
-	 */
-	preq->eng_io = &top_delta->io;
-	BUG_ON(test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state));
-	set_bit(PLOOP_REQ_ISSUE_FLUSH, &preq->state);
-	top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
-				  &sbl, preq->iblock, 1<<plo->cluster_log);
+	if (++plo->grow_relocated > plo->grow_end - plo->grow_start) {
+		requeue_req(preq, PLOOP_E_COMPLETE);
+		return;
+	}
+
+	del_lockout(preq);
+	preq->req_cluster++;
+	requeue_req(preq, PLOOP_E_ENTRY);
 }
 
 static void map_wb_complete(struct map_node * m, int err)
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index b8c480a..8abc6f9 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -61,6 +61,7 @@  enum {
 				   (for minor mgmt only) */
 	PLOOP_S_ONCE,	        /* An event (e.g. printk once) happened */
 	PLOOP_S_PUSH_BACKUP,	/* Push_backup is in progress */
+	PLOOP_S_NULLIFY,	/* Nullifying BAT is in progress */
 };
 
 enum {
@@ -486,6 +487,7 @@  enum
 	PLOOP_REQ_MERGE,
 	PLOOP_REQ_RELOC_A,	/* 'A' stands for allocate() */
 	PLOOP_REQ_RELOC_S,	/* 'S' stands for submit() */
+	PLOOP_REQ_RELOC_N,	/* 'N' stands for "nullify" */
 	PLOOP_REQ_ZERO,
 	PLOOP_REQ_DISCARD,
 	PLOOP_REQ_RSYNC,
@@ -500,6 +502,7 @@  enum
 #define PLOOP_REQ_MERGE_FL (1 << PLOOP_REQ_MERGE)
 #define PLOOP_REQ_RELOC_A_FL (1 << PLOOP_REQ_RELOC_A)
 #define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S)
+#define PLOOP_REQ_RELOC_N_FL (1 << PLOOP_REQ_RELOC_N)
 #define PLOOP_REQ_DISCARD_FL (1 << PLOOP_REQ_DISCARD)
 #define PLOOP_REQ_ZERO_FL (1 << PLOOP_REQ_ZERO)
 
@@ -514,7 +517,7 @@  enum
 	PLOOP_E_DELTA_COPIED,	/* Data from previos delta was bcopy-ied */
 	PLOOP_E_TRANS_DELTA_READ,/* Write request reads data from trans delta */
 	PLOOP_E_RELOC_DATA_READ,/* Read user data to relocate */
-	PLOOP_E_RELOC_NULLIFY,  /* Zeroing relocated block is in progress */
+	PLOOP_E_RELOC_NULLIFY,  /* Zeroing given iblock is in progress */
 	PLOOP_E_INDEX_DELAY,	/* Index update is blocked by already queued
 				 * index update.
 				 */