[Devel,rh7] ploop: add support for dm-crypted ploops

Submitted by Andrey Ryabinin on Aug. 15, 2016, 5:27 p.m.

Details

Message ID 1471282031-9139-1-git-send-email-aryabinin@virtuozzo.com
State New
Series "ploop: add support for dm-crypted ploops"
Headers show

Commit Message

Andrey Ryabinin Aug. 15, 2016, 5:27 p.m.
On dm-crypted ploop fs is mounted not on ploop but on dm-crypt device.
Thus freeze/thaw used by some ploop's ioctl doesn't freeze/thaw filesystem.
To fix that, we store pointer to dm-crypt block device inside ploop_device
struct, and use it to freeze/thaw filesystem.

https://jira.sw.ru/browse/PSBM-50858

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 drivers/block/ploop/dev.c       | 27 +++++++++++++++++++++++++--
 drivers/block/ploop/io_direct.c |  9 +++++++++
 drivers/md/dm-crypt.c           |  8 +++++++-
 drivers/md/dm.c                 |  6 ++++++
 drivers/md/dm.h                 |  2 ++
 include/linux/ploop/ploop.h     | 38 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 87 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 83b0e32..acc120b 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -3318,13 +3318,22 @@  void ploop_relax(struct ploop_device * plo)
 }
 
 /* search disk for first partition bdev with mounted fs and freeze it */
-static struct super_block *find_and_freeze_bdev(struct gendisk *disk,
+static struct super_block *find_and_freeze_bdev(struct ploop_device *plo,
 						struct block_device ** bdev_pp)
 {
 	struct super_block  * sb   = NULL;
 	struct block_device * bdev = NULL;
+	struct gendisk *disk = plo->disk;
 	int i;
 
+	bdev = ploop_get_dm_crypt_bdev(plo);
+	if (bdev) {
+		sb = freeze_bdev(bdev);
+		bdput(bdev);
+		*bdev_pp = bdev;
+		return sb;
+	}
+
 	for (i = 0; i <= (*bdev_pp)->bd_part_count; i++) {
 		bdev = bdget_disk(disk, i);
 		if (!bdev)
@@ -3398,7 +3407,7 @@  static int ploop_snapshot(struct ploop_device * plo, unsigned long arg,
 		/* freeze_bdev() may trigger ploop_bd_full() */
 		plo->maintenance_type = PLOOP_MNTN_SNAPSHOT;
 		mutex_unlock(&plo->ctl_mutex);
-		sb = find_and_freeze_bdev(plo->disk, &bdev);
+		sb = find_and_freeze_bdev(plo, &bdev);
 		mutex_lock(&plo->ctl_mutex);
 		plo->maintenance_type = PLOOP_MNTN_OFF;
 		if (IS_ERR(sb)) {
@@ -4916,6 +4925,7 @@  static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long arg)
 static int ploop_freeze(struct ploop_device *plo, struct block_device *bdev)
 {
 	struct super_block *sb = plo->sb;
+	struct block_device *dm_crypt_bdev;
 
 	if (!test_bit(PLOOP_S_RUNNING, &plo->state))
 		return -EINVAL;
@@ -4926,7 +4936,12 @@  static int ploop_freeze(struct ploop_device *plo, struct block_device *bdev)
 	if (plo->freeze_state == PLOOP_F_THAWING)
 		return -EBUSY;
 
+	dm_crypt_bdev = ploop_get_dm_crypt_bdev(plo);
+	if (dm_crypt_bdev)
+		bdev = dm_crypt_bdev;
 	sb = freeze_bdev(bdev);
+	ploop_put_dm_crypt_bdev(dm_crypt_bdev);
+
 	if (sb && IS_ERR(sb))
 		return PTR_ERR(sb);
 
@@ -4938,6 +4953,7 @@  static int ploop_freeze(struct ploop_device *plo, struct block_device *bdev)
 static int ploop_thaw(struct ploop_device *plo, struct block_device *bdev)
 {
 	struct super_block *sb = plo->sb;
+	struct block_device *dm_crypt_bdev;
 	int err;
 
 	if (!test_bit(PLOOP_S_RUNNING, &plo->state))
@@ -4952,8 +4968,15 @@  static int ploop_thaw(struct ploop_device *plo, struct block_device *bdev)
 	plo->sb = NULL;
 	plo->freeze_state = PLOOP_F_THAWING;
 
+	dm_crypt_bdev = __ploop_get_dm_crypt_bdev(plo);
+	if (dm_crypt_bdev)
+		bdev = dm_crypt_bdev;
+
 	mutex_unlock(&plo->ctl_mutex);
+
 	err = thaw_bdev(bdev, sb);
+	ploop_put_dm_crypt_bdev(dm_crypt_bdev);
+
 	mutex_lock(&plo->ctl_mutex);
 
 	BUG_ON(plo->freeze_state != PLOOP_F_THAWING);
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index c12e3c8..0b1b0f6 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -871,13 +871,22 @@  static int dio_invalidate_cache(struct address_space * mapping,
 retry:
 	err = invalidate_inode_pages2(mapping);
 	if (err) {
+		struct ploop_device *plo = bdev->bd_disk->private_data;
+		struct block_device *dm_crypt_bdev;
+
 		printk("PLOOP: failed to invalidate page cache %d/%d\n", err, attempt2);
 		if (attempt2)
 			return err;
 		attempt2 = 1;
 
 		mutex_unlock(&mapping->host->i_mutex);
+
+		dm_crypt_bdev = ploop_get_dm_crypt_bdev(plo);
+		if (dm_crypt_bdev)
+			bdev = dm_crypt_bdev;
 		thaw_bdev(bdev, freeze_bdev(bdev));
+		ploop_put_dm_crypt_bdev(dm_crypt_bdev);
+
 		mutex_lock(&mapping->host->i_mutex);
 		goto retry;
 	}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 56ca046..bcdd794 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -35,6 +35,8 @@ 
 
 #define DM_MSG_PREFIX "crypt"
 
+#include <linux/ploop/ploop.h>
+#include "dm.h"
 /*
  * context holding the current state of a multi-part conversion
  */
@@ -1645,8 +1647,10 @@  static void crypt_dtr(struct dm_target *ti)
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
 
-	if (cc->dev)
+	if (cc->dev) {
+		ploop_set_dm_crypt_bdev(cc->dev->bdev, NULL);
 		dm_put_device(ti, cc->dev);
+	}
 
 	kzfree(cc->cipher);
 	kzfree(cc->cipher_string);
@@ -1915,6 +1919,8 @@  static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
+	ploop_set_dm_crypt_bdev(cc->dev->bdev, dm_md_get_bdev(dm_table_get_md(ti->table)));
+
 	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
 		ti->error = "Invalid device sector";
 		goto bad;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8732079..d9a24c9 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -310,6 +310,12 @@  unsigned dm_get_reserved_rq_based_ios(void)
 }
 EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
 
+struct block_device *dm_md_get_bdev(struct mapped_device *md)
+{
+	return md->bdev;
+}
+EXPORT_SYMBOL_GPL(dm_md_get_bdev);
+
 static int __init local_init(void)
 {
 	int r = -ENOMEM;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 6123c2b..815afa5 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -79,6 +79,8 @@  struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
 int dm_queue_merge_is_compulsory(struct request_queue *q);
 
+struct block_device *dm_md_get_bdev(struct mapped_device *md);
+
 void dm_lock_md_type(struct mapped_device *md);
 void dm_unlock_md_type(struct mapped_device *md);
 void dm_set_md_type(struct mapped_device *md, unsigned type);
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index b2ef6bd..bd801f3 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -468,6 +468,7 @@  struct ploop_device
 
 	struct ploop_freeblks_desc *fbd;
 	struct ploop_pushbackup_desc *pbd;
+	struct block_device *dm_crypt_bdev;
 
 	unsigned long		locking_state; /* plo locked by userspace */
 };
@@ -634,6 +635,43 @@  static inline int ploop_req_delay_fua_possible(struct ploop_request *preq)
 	return preq->eng_state == PLOOP_E_DATA_WBI;
 }
 
+static inline void ploop_set_dm_crypt_bdev(struct block_device *ploop_bdev,
+				struct block_device *bdev)
+{
+	if (MAJOR(ploop_bdev->bd_dev) == PLOOP_DEVICE_MAJOR) {
+		struct ploop_device *plo = ploop_bdev->bd_disk->private_data;
+		mutex_lock(&plo->ctl_mutex);
+		plo->dm_crypt_bdev = bdev;
+		mutex_unlock(&plo->ctl_mutex);
+	}
+}
+
+static inline struct block_device *__ploop_get_dm_crypt_bdev(
+	struct ploop_device *plo)
+{
+	if (plo->dm_crypt_bdev)
+		bdgrab(plo->dm_crypt_bdev);
+
+	return plo->dm_crypt_bdev;
+}
+
+static inline struct block_device *ploop_get_dm_crypt_bdev(
+	struct ploop_device *plo)
+{
+	struct block_device *ret;
+
+	mutex_lock(&plo->ctl_mutex);
+	ret = __ploop_get_dm_crypt_bdev(plo);
+	mutex_unlock(&plo->ctl_mutex);
+	return ret;
+}
+
+static inline void ploop_put_dm_crypt_bdev(struct block_device *bdev)
+{
+	if (bdev)
+		bdput(bdev);
+}
+
 static inline void ploop_req_set_error(struct ploop_request * preq, int err)
 {
 	if (!preq->error) {

Comments

Maxim Patlasov Aug. 18, 2016, 1:21 a.m.
Andrey,


ploop_freeze() must use __ploop_get_dm_crypt_bdev(), not 
ploop_get_dm_crypt_bdev()!


Another problem is that the patch does not ensure that 
plo->dm_crypt_bdev stays the same between PLOOP_IOC_FREEZE/THAW. I could 
easily get:

[  636.600190] BUG: unable to handle kernel NULL pointer dereference at 
0000000000000068
[  636.600934] IP: [<ffffffff810ac5d3>] up_read+0x13/0x30
[  636.601446] PGD 222f5a067 PUD 224bc6067 PMD 0
[  636.601915] Oops: 0002 [#1] SMP
...

[  636.623264] Call Trace:
[  636.623597]  [<ffffffff811fd346>] drop_super+0x16/0x30
[  636.624104]  [<ffffffff81237010>] freeze_bdev+0x60/0xe0
[  636.624627]  [<ffffffffa0541a3e>] ploop_ioctl+0x2b8e/0x2c80 [ploop]
[  636.625237]  [<ffffffff811acb44>] ? handle_mm_fault+0x5b4/0xf50
[  636.625822]  [<ffffffff812d41ef>] blkdev_ioctl+0x2df/0x770
[  636.626439]  [<ffffffff812365d1>] block_ioctl+0x41/0x50
[  636.627162]  [<ffffffff8120d515>] do_vfs_ioctl+0x255/0x4f0
[  636.627827]  [<ffffffff8120d804>] SyS_ioctl+0x54/0xa0

due to this problem. Similar might exist in ploop_snapshot(): you bdput 
plo->dm_crypt_bdev in find_and_freeze_bdev(), so it can disappear by the 
time we need it for thaw_bdev after calling complete_snapshot.


Btw, it's usually good idea to give a patch some simple testing prior 
sending it for review ;)


Thanks,

Maxim



On 08/15/2016 10:27 AM, Andrey Ryabinin wrote:
> On dm-crypted ploop fs is mounted not on ploop but on dm-crypt device.
> Thus freeze/thaw used by some ploop's ioctl doesn't freeze/thaw filesystem.
> To fix that, we store pointer to dm-crypt block device inside ploop_device
> struct, and use it to freeze/thaw filesystem.
>
> https://jira.sw.ru/browse/PSBM-50858
>
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
>   drivers/block/ploop/dev.c       | 27 +++++++++++++++++++++++++--
>   drivers/block/ploop/io_direct.c |  9 +++++++++
>   drivers/md/dm-crypt.c           |  8 +++++++-
>   drivers/md/dm.c                 |  6 ++++++
>   drivers/md/dm.h                 |  2 ++
>   include/linux/ploop/ploop.h     | 38 ++++++++++++++++++++++++++++++++++++++
>   6 files changed, 87 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 83b0e32..acc120b 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -3318,13 +3318,22 @@ void ploop_relax(struct ploop_device * plo)
>   }
>   
>   /* search disk for first partition bdev with mounted fs and freeze it */
> -static struct super_block *find_and_freeze_bdev(struct gendisk *disk,
> +static struct super_block *find_and_freeze_bdev(struct ploop_device *plo,
>   						struct block_device ** bdev_pp)
>   {
>   	struct super_block  * sb   = NULL;
>   	struct block_device * bdev = NULL;
> +	struct gendisk *disk = plo->disk;
>   	int i;
>   
> +	bdev = ploop_get_dm_crypt_bdev(plo);
> +	if (bdev) {
> +		sb = freeze_bdev(bdev);
> +		bdput(bdev);
> +		*bdev_pp = bdev;
> +		return sb;
> +	}
> +
>   	for (i = 0; i <= (*bdev_pp)->bd_part_count; i++) {
>   		bdev = bdget_disk(disk, i);
>   		if (!bdev)
> @@ -3398,7 +3407,7 @@ static int ploop_snapshot(struct ploop_device * plo, unsigned long arg,
>   		/* freeze_bdev() may trigger ploop_bd_full() */
>   		plo->maintenance_type = PLOOP_MNTN_SNAPSHOT;
>   		mutex_unlock(&plo->ctl_mutex);
> -		sb = find_and_freeze_bdev(plo->disk, &bdev);
> +		sb = find_and_freeze_bdev(plo, &bdev);
>   		mutex_lock(&plo->ctl_mutex);
>   		plo->maintenance_type = PLOOP_MNTN_OFF;
>   		if (IS_ERR(sb)) {
> @@ -4916,6 +4925,7 @@ static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long arg)
>   static int ploop_freeze(struct ploop_device *plo, struct block_device *bdev)
>   {
>   	struct super_block *sb = plo->sb;
> +	struct block_device *dm_crypt_bdev;
>   
>   	if (!test_bit(PLOOP_S_RUNNING, &plo->state))
>   		return -EINVAL;
> @@ -4926,7 +4936,12 @@ static int ploop_freeze(struct ploop_device *plo, struct block_device *bdev)
>   	if (plo->freeze_state == PLOOP_F_THAWING)
>   		return -EBUSY;
>   
> +	dm_crypt_bdev = ploop_get_dm_crypt_bdev(plo);
> +	if (dm_crypt_bdev)
> +		bdev = dm_crypt_bdev;
>   	sb = freeze_bdev(bdev);
> +	ploop_put_dm_crypt_bdev(dm_crypt_bdev);
> +
>   	if (sb && IS_ERR(sb))
>   		return PTR_ERR(sb);
>   
> @@ -4938,6 +4953,7 @@ static int ploop_freeze(struct ploop_device *plo, struct block_device *bdev)
>   static int ploop_thaw(struct ploop_device *plo, struct block_device *bdev)
>   {
>   	struct super_block *sb = plo->sb;
> +	struct block_device *dm_crypt_bdev;
>   	int err;
>   
>   	if (!test_bit(PLOOP_S_RUNNING, &plo->state))
> @@ -4952,8 +4968,15 @@ static int ploop_thaw(struct ploop_device *plo, struct block_device *bdev)
>   	plo->sb = NULL;
>   	plo->freeze_state = PLOOP_F_THAWING;
>   
> +	dm_crypt_bdev = __ploop_get_dm_crypt_bdev(plo);
> +	if (dm_crypt_bdev)
> +		bdev = dm_crypt_bdev;
> +
>   	mutex_unlock(&plo->ctl_mutex);
> +
>   	err = thaw_bdev(bdev, sb);
> +	ploop_put_dm_crypt_bdev(dm_crypt_bdev);
> +
>   	mutex_lock(&plo->ctl_mutex);
>   
>   	BUG_ON(plo->freeze_state != PLOOP_F_THAWING);
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index c12e3c8..0b1b0f6 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -871,13 +871,22 @@ static int dio_invalidate_cache(struct address_space * mapping,
>   retry:
>   	err = invalidate_inode_pages2(mapping);
>   	if (err) {
> +		struct ploop_device *plo = bdev->bd_disk->private_data;
> +		struct block_device *dm_crypt_bdev;
> +
>   		printk("PLOOP: failed to invalidate page cache %d/%d\n", err, attempt2);
>   		if (attempt2)
>   			return err;
>   		attempt2 = 1;
>   
>   		mutex_unlock(&mapping->host->i_mutex);
> +
> +		dm_crypt_bdev = ploop_get_dm_crypt_bdev(plo);
> +		if (dm_crypt_bdev)
> +			bdev = dm_crypt_bdev;
>   		thaw_bdev(bdev, freeze_bdev(bdev));
> +		ploop_put_dm_crypt_bdev(dm_crypt_bdev);
> +
>   		mutex_lock(&mapping->host->i_mutex);
>   		goto retry;
>   	}
> diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
> index 56ca046..bcdd794 100644
> --- a/drivers/md/dm-crypt.c
> +++ b/drivers/md/dm-crypt.c
> @@ -35,6 +35,8 @@
>   
>   #define DM_MSG_PREFIX "crypt"
>   
> +#include <linux/ploop/ploop.h>
> +#include "dm.h"
>   /*
>    * context holding the current state of a multi-part conversion
>    */
> @@ -1645,8 +1647,10 @@ static void crypt_dtr(struct dm_target *ti)
>   	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
>   		cc->iv_gen_ops->dtr(cc);
>   
> -	if (cc->dev)
> +	if (cc->dev) {
> +		ploop_set_dm_crypt_bdev(cc->dev->bdev, NULL);
>   		dm_put_device(ti, cc->dev);
> +	}
>   
>   	kzfree(cc->cipher);
>   	kzfree(cc->cipher_string);
> @@ -1915,6 +1919,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>   		goto bad;
>   	}
>   
> +	ploop_set_dm_crypt_bdev(cc->dev->bdev, dm_md_get_bdev(dm_table_get_md(ti->table)));
> +
>   	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
>   		ti->error = "Invalid device sector";
>   		goto bad;
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 8732079..d9a24c9 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -310,6 +310,12 @@ unsigned dm_get_reserved_rq_based_ios(void)
>   }
>   EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
>   
> +struct block_device *dm_md_get_bdev(struct mapped_device *md)
> +{
> +	return md->bdev;
> +}
> +EXPORT_SYMBOL_GPL(dm_md_get_bdev);
> +
>   static int __init local_init(void)
>   {
>   	int r = -ENOMEM;
> diff --git a/drivers/md/dm.h b/drivers/md/dm.h
> index 6123c2b..815afa5 100644
> --- a/drivers/md/dm.h
> +++ b/drivers/md/dm.h
> @@ -79,6 +79,8 @@ struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
>   
>   int dm_queue_merge_is_compulsory(struct request_queue *q);
>   
> +struct block_device *dm_md_get_bdev(struct mapped_device *md);
> +
>   void dm_lock_md_type(struct mapped_device *md);
>   void dm_unlock_md_type(struct mapped_device *md);
>   void dm_set_md_type(struct mapped_device *md, unsigned type);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index b2ef6bd..bd801f3 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -468,6 +468,7 @@ struct ploop_device
>   
>   	struct ploop_freeblks_desc *fbd;
>   	struct ploop_pushbackup_desc *pbd;
> +	struct block_device *dm_crypt_bdev;
>   
>   	unsigned long		locking_state; /* plo locked by userspace */
>   };
> @@ -634,6 +635,43 @@ static inline int ploop_req_delay_fua_possible(struct ploop_request *preq)
>   	return preq->eng_state == PLOOP_E_DATA_WBI;
>   }
>   
> +static inline void ploop_set_dm_crypt_bdev(struct block_device *ploop_bdev,
> +				struct block_device *bdev)
> +{
> +	if (MAJOR(ploop_bdev->bd_dev) == PLOOP_DEVICE_MAJOR) {
> +		struct ploop_device *plo = ploop_bdev->bd_disk->private_data;
> +		mutex_lock(&plo->ctl_mutex);
> +		plo->dm_crypt_bdev = bdev;
> +		mutex_unlock(&plo->ctl_mutex);
> +	}
> +}
> +
> +static inline struct block_device *__ploop_get_dm_crypt_bdev(
> +	struct ploop_device *plo)
> +{
> +	if (plo->dm_crypt_bdev)
> +		bdgrab(plo->dm_crypt_bdev);
> +
> +	return plo->dm_crypt_bdev;
> +}
> +
> +static inline struct block_device *ploop_get_dm_crypt_bdev(
> +	struct ploop_device *plo)
> +{
> +	struct block_device *ret;
> +
> +	mutex_lock(&plo->ctl_mutex);
> +	ret = __ploop_get_dm_crypt_bdev(plo);
> +	mutex_unlock(&plo->ctl_mutex);
> +	return ret;
> +}
> +
> +static inline void ploop_put_dm_crypt_bdev(struct block_device *bdev)
> +{
> +	if (bdev)
> +		bdput(bdev);
> +}
> +
>   static inline void ploop_req_set_error(struct ploop_request * preq, int err)
>   {
>   	if (!preq->error) {
Andrey Ryabinin Aug. 18, 2016, 4:49 p.m.
On 08/18/2016 04:21 AM, Maxim Patlasov wrote:
> Andrey,
> 
> 
> ploop_freeze() must use __ploop_get_dm_crypt_bdev(), not ploop_get_dm_crypt_bdev()!
> 

YUp.

> 
> Another problem is that the patch does not ensure that plo->dm_crypt_bdev stays the same between PLOOP_IOC_FREEZE/THAW. I could easily get:
> 
> [  636.600190] BUG: unable to handle kernel NULL pointer dereference at 0000000000000068
> [  636.600934] IP: [<ffffffff810ac5d3>] up_read+0x13/0x30
> [  636.601446] PGD 222f5a067 PUD 224bc6067 PMD 0
> [  636.601915] Oops: 0002 [#1] SMP
> ...
> 
> [  636.623264] Call Trace:
> [  636.623597]  [<ffffffff811fd346>] drop_super+0x16/0x30
> [  636.624104]  [<ffffffff81237010>] freeze_bdev+0x60/0xe0
> [  636.624627]  [<ffffffffa0541a3e>] ploop_ioctl+0x2b8e/0x2c80 [ploop]
> [  636.625237]  [<ffffffff811acb44>] ? handle_mm_fault+0x5b4/0xf50
> [  636.625822]  [<ffffffff812d41ef>] blkdev_ioctl+0x2df/0x770
> [  636.626439]  [<ffffffff812365d1>] block_ioctl+0x41/0x50
> [  636.627162]  [<ffffffff8120d515>] do_vfs_ioctl+0x255/0x4f0
> [  636.627827]  [<ffffffff8120d804>] SyS_ioctl+0x54/0xa0
> 
> due to this problem. 

I don't see direct relation between this crash and changing plo->dm_crypt_bdev in between freeze/thaw.

The way I read this

struct super_block *freeze_bdev(struct block_device *bdev)
...
	if (++bdev->bd_fsfreeze_count > 1) {

		sb = get_super(bdev);  // this returned NULL, so we don't have active super block on this device.
		drop_super(sb); // NULL ptr deref
		mutex_unlock(&bdev->bd_fsfreeze_mutex);
		return sb;
	

AFAIU, this can happen if we call freeze_bdev() twice on block device which doesn't have mounted fs.
Isn't this a bug in freeze_bdev()?
And how did you get this crash?

> Similar might exist in ploop_snapshot(): you bdput plo->dm_crypt_bdev in find_and_freeze_bdev(), so it can disappear by the time we need it for thaw_bdev after calling complete_snapshot.
> 
> 
> Btw, it's usually good idea to give a patch some simple testing prior sending it for review ;)
> 

http://devopsreactions.tumblr.com/post/88260308392/testing-my-own-code

> 
> Thanks,
> 
> Maxim
> 
> 
>
Maxim Patlasov Aug. 18, 2016, 11:40 p.m.
On 08/18/2016 09:49 AM, Andrey Ryabinin wrote:

>
> On 08/18/2016 04:21 AM, Maxim Patlasov wrote:
>> Andrey,
>>
>>
>> ploop_freeze() must use __ploop_get_dm_crypt_bdev(), not ploop_get_dm_crypt_bdev()!
>>
> YUp.
>
>> Another problem is that the patch does not ensure that plo->dm_crypt_bdev stays the same between PLOOP_IOC_FREEZE/THAW. I could easily get:
>>
>> [  636.600190] BUG: unable to handle kernel NULL pointer dereference at 0000000000000068
>> [  636.600934] IP: [<ffffffff810ac5d3>] up_read+0x13/0x30
>> [  636.601446] PGD 222f5a067 PUD 224bc6067 PMD 0
>> [  636.601915] Oops: 0002 [#1] SMP
>> ...
>>
>> [  636.623264] Call Trace:
>> [  636.623597]  [<ffffffff811fd346>] drop_super+0x16/0x30
>> [  636.624104]  [<ffffffff81237010>] freeze_bdev+0x60/0xe0
>> [  636.624627]  [<ffffffffa0541a3e>] ploop_ioctl+0x2b8e/0x2c80 [ploop]
>> [  636.625237]  [<ffffffff811acb44>] ? handle_mm_fault+0x5b4/0xf50
>> [  636.625822]  [<ffffffff812d41ef>] blkdev_ioctl+0x2df/0x770
>> [  636.626439]  [<ffffffff812365d1>] block_ioctl+0x41/0x50
>> [  636.627162]  [<ffffffff8120d515>] do_vfs_ioctl+0x255/0x4f0
>> [  636.627827]  [<ffffffff8120d804>] SyS_ioctl+0x54/0xa0
>>
>> due to this problem.
> I don't see direct relation between this crash and changing plo->dm_crypt_bdev in between freeze/thaw.

In ideal world freeze+thaw+freeze+thaw must change bd_fsfreeze_count 
like this: 0 --> 1 --> 0 --> 1 --> 0. But if plo->dm_crypt_bdev changes 
in between, the first increment may be applied to one bdev, then 
decrement to another bdev, then the second increment again to the first 
bdev.

>
> The way I read this
>
> struct super_block *freeze_bdev(struct block_device *bdev)
> ...
> 	if (++bdev->bd_fsfreeze_count > 1) {
>
> 		sb = get_super(bdev);  // this returned NULL, so we don't have active super block on this device.
> 		drop_super(sb); // NULL ptr deref
> 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
> 		return sb;
> 	
>
> AFAIU, this can happen if we call freeze_bdev() twice on block device which doesn't have mounted fs.
> Isn't this a bug in freeze_bdev()?

No. The bug is to call freeze_bdev second time if the first time it 
returned sb == NULL.

> And how did you get this crash?

1) freeze (increments ploop bdev counter)
2) cryptsetup luksOpen
3) thaw (do nothing)
4) cryptsetup luksClose
5) freeze (attempts to increment ploop bdev counter again)

>
>> Similar might exist in ploop_snapshot(): you bdput plo->dm_crypt_bdev in find_and_freeze_bdev(), so it can disappear by the time we need it for thaw_bdev after calling complete_snapshot.
>>
>>
>> Btw, it's usually good idea to give a patch some simple testing prior sending it for review ;)
>>
> http://devopsreactions.tumblr.com/post/88260308392/testing-my-own-code

:)

>
>> Thanks,
>>
>> Maxim
>>
>>
>>
Andrey Ryabinin Aug. 19, 2016, 10:44 a.m.
On 08/19/2016 02:40 AM, Maxim Patlasov wrote:
> On 08/18/2016 09:49 AM, Andrey Ryabinin wrote:
> 
>>
>> On 08/18/2016 04:21 AM, Maxim Patlasov wrote:
>>> Andrey,
>>>
>>>
>>> ploop_freeze() must use __ploop_get_dm_crypt_bdev(), not ploop_get_dm_crypt_bdev()!
>>>
>> YUp.
>>
>>> Another problem is that the patch does not ensure that plo->dm_crypt_bdev stays the same between PLOOP_IOC_FREEZE/THAW. I could easily get:
>>>
>>> [  636.600190] BUG: unable to handle kernel NULL pointer dereference at 0000000000000068
>>> [  636.600934] IP: [<ffffffff810ac5d3>] up_read+0x13/0x30
>>> [  636.601446] PGD 222f5a067 PUD 224bc6067 PMD 0
>>> [  636.601915] Oops: 0002 [#1] SMP
>>> ...
>>>
>>> [  636.623264] Call Trace:
>>> [  636.623597]  [<ffffffff811fd346>] drop_super+0x16/0x30
>>> [  636.624104]  [<ffffffff81237010>] freeze_bdev+0x60/0xe0
>>> [  636.624627]  [<ffffffffa0541a3e>] ploop_ioctl+0x2b8e/0x2c80 [ploop]
>>> [  636.625237]  [<ffffffff811acb44>] ? handle_mm_fault+0x5b4/0xf50
>>> [  636.625822]  [<ffffffff812d41ef>] blkdev_ioctl+0x2df/0x770
>>> [  636.626439]  [<ffffffff812365d1>] block_ioctl+0x41/0x50
>>> [  636.627162]  [<ffffffff8120d515>] do_vfs_ioctl+0x255/0x4f0
>>> [  636.627827]  [<ffffffff8120d804>] SyS_ioctl+0x54/0xa0
>>>
>>> due to this problem.
>> I don't see direct relation between this crash and changing plo->dm_crypt_bdev in between freeze/thaw.
> 
> In ideal world freeze+thaw+freeze+thaw must change bd_fsfreeze_count like this: 0 --> 1 --> 0 --> 1 --> 0. But if plo->dm_crypt_bdev changes in between, the first increment may be applied to one bdev, then decrement to another bdev, then the second increment again to the first bdev.
> 
>>
>> The way I read this
>>
>> struct super_block *freeze_bdev(struct block_device *bdev)
>> ...
>>     if (++bdev->bd_fsfreeze_count > 1) {
>>
>>         sb = get_super(bdev);  // this returned NULL, so we don't have active super block on this device.
>>         drop_super(sb); // NULL ptr deref
>>         mutex_unlock(&bdev->bd_fsfreeze_mutex);
>>         return sb;
>>     
>>
>> AFAIU, this can happen if we call freeze_bdev() twice on block device which doesn't have mounted fs.
>> Isn't this a bug in freeze_bdev()?
> 
> No. The bug is to call freeze_bdev second time if the first time it returned sb == NULL.
> 

Disagreed. This would mean that all callers supposed to synchronize freeze_bdev()/thaw_bdev() sequence.
Otherwise we can get:

CPU 0:					CPU1:
	freeze_bdev() //return NULL
					freeze_bdev() //NULL-ptr deref
					thaw_bdev()
	thaw_bdev()

So, how do you propose to fix that case? If freeze_bdev() on CPU1 is illegal, that would mean that every freeze_bdev()...thaw_bdev() should
be in critical section, iow surrounded by some per-bdev lock. While comment near freeze_bdev() says that it totally ok if multiple freeze
request arrive simultaneously.

Note, that this patch is also still affected by this bug, e.g.:
	1) cryptsetup open /dev/ploop0p1 test
	2) freeze ploop
	3) dmsetup suspend test //suspend calls lock_fs()->freeze_bdev() -> NULL ptr deref


So the only sane way to fix this is to add NULL check before drop_super().
Maxim Patlasov Aug. 19, 2016, 5:14 p.m.
Andrey,


On 08/19/2016 03:44 AM, Andrey Ryabinin wrote:

>
> On 08/19/2016 02:40 AM, Maxim Patlasov wrote:
>> On 08/18/2016 09:49 AM, Andrey Ryabinin wrote:
>>
>>> On 08/18/2016 04:21 AM, Maxim Patlasov wrote:
>>>> Andrey,
>>>>
>>>>
>>>> ploop_freeze() must use __ploop_get_dm_crypt_bdev(), not ploop_get_dm_crypt_bdev()!
>>>>
>>> YUp.
>>>
>>>> Another problem is that the patch does not ensure that plo->dm_crypt_bdev stays the same between PLOOP_IOC_FREEZE/THAW. I could easily get:
>>>>
>>>> [  636.600190] BUG: unable to handle kernel NULL pointer dereference at 0000000000000068
>>>> [  636.600934] IP: [<ffffffff810ac5d3>] up_read+0x13/0x30
>>>> [  636.601446] PGD 222f5a067 PUD 224bc6067 PMD 0
>>>> [  636.601915] Oops: 0002 [#1] SMP
>>>> ...
>>>>
>>>> [  636.623264] Call Trace:
>>>> [  636.623597]  [<ffffffff811fd346>] drop_super+0x16/0x30
>>>> [  636.624104]  [<ffffffff81237010>] freeze_bdev+0x60/0xe0
>>>> [  636.624627]  [<ffffffffa0541a3e>] ploop_ioctl+0x2b8e/0x2c80 [ploop]
>>>> [  636.625237]  [<ffffffff811acb44>] ? handle_mm_fault+0x5b4/0xf50
>>>> [  636.625822]  [<ffffffff812d41ef>] blkdev_ioctl+0x2df/0x770
>>>> [  636.626439]  [<ffffffff812365d1>] block_ioctl+0x41/0x50
>>>> [  636.627162]  [<ffffffff8120d515>] do_vfs_ioctl+0x255/0x4f0
>>>> [  636.627827]  [<ffffffff8120d804>] SyS_ioctl+0x54/0xa0
>>>>
>>>> due to this problem.
>>> I don't see direct relation between this crash and changing plo->dm_crypt_bdev in between freeze/thaw.
>> In ideal world freeze+thaw+freeze+thaw must change bd_fsfreeze_count like this: 0 --> 1 --> 0 --> 1 --> 0. But if plo->dm_crypt_bdev changes in between, the first increment may be applied to one bdev, then decrement to another bdev, then the second increment again to the first bdev.
>>
>>> The way I read this
>>>
>>> struct super_block *freeze_bdev(struct block_device *bdev)
>>> ...
>>>      if (++bdev->bd_fsfreeze_count > 1) {
>>>
>>>          sb = get_super(bdev);  // this returned NULL, so we don't have active super block on this device.
>>>          drop_super(sb); // NULL ptr deref
>>>          mutex_unlock(&bdev->bd_fsfreeze_mutex);
>>>          return sb;
>>>      
>>>
>>> AFAIU, this can happen if we call freeze_bdev() twice on block device which doesn't have mounted fs.
>>> Isn't this a bug in freeze_bdev()?
>> No. The bug is to call freeze_bdev second time if the first time it returned sb == NULL.
>>
> Disagreed. This would mean that all callers supposed to synchronize freeze_bdev()/thaw_bdev() sequence.
> Otherwise we can get:
>
> CPU 0:					CPU1:
> 	freeze_bdev() //return NULL
> 					freeze_bdev() //NULL-ptr deref
> 					thaw_bdev()
> 	thaw_bdev()
>
> So, how do you propose to fix that case? If freeze_bdev() on CPU1 is illegal, that would mean that every freeze_bdev()...thaw_bdev() should
> be in critical section, iow surrounded by some per-bdev lock. While comment near freeze_bdev() says that it totally ok if multiple freeze
> request arrive simultaneously.

Makes sense. Will you send the patch upstream?

>
> Note, that this patch is also still affected by this bug, e.g.:
> 	1) cryptsetup open /dev/ploop0p1 test
> 	2) freeze ploop
> 	3) dmsetup suspend test //suspend calls lock_fs()->freeze_bdev() -> NULL ptr deref
>
>
> So the only sane way to fix this is to add NULL check before drop_super().

"freeze ploop" might "thaw" immediately if !sb. Yeah, this is racy, but 
the chances of such a race are negligibly slim. On the other hand, your 
fix for freeze_bdev() is so simple and straightforward, that I'd vote 
for that (even if the fix is rejected upstream for some reasons).
Thanks,
Maxim