[Devel,rh7] block/ploop: Add resched points in ploop thread.

Submitted by Andrey Ryabinin on May 29, 2017, 9:32 a.m.

Details

Message ID 20170529093207.14503-1-aryabinin@virtuozzo.com
State New
Series "block/ploop: Add resched points in ploop thread."
Headers show

Commit Message

Andrey Ryabinin May 29, 2017, 9:32 a.m.
Ploop thread may not reschedule for a long time and cause softlockup
warnings on a debug kernel:

 NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ploop56082:13917]
...
 RIP: 0010:[<ffffffff8256f426>]  [<ffffffff8256f426>] _raw_spin_unlock_irqrestore+0x46/0xc0
...
 Call Trace:
  [<ffffffff816194f7>] __slab_free+0x207/0x2f0
  [<ffffffff81619ff1>] kmem_cache_free+0x2d1/0x380
  [<ffffffff81514412>] mempool_free_slab+0x22/0x30
  [<ffffffff815159ae>] mempool_free+0x14e/0x1f0
  [<ffffffff81747d6a>] bio_put+0x17a/0x270
  [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
  [<ffffffff81747d6a>] bio_put+0x17a/0x270
  [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
  [<ffffffff8174a5ac>] bio_endio+0x13c/0x220
  [<ffffffffa0858a38>] ploop_complete_request+0x148/0x1c60 [ploop]
  [<ffffffffa086b06d>] ploop_req_state_process+0x1f7d/0x3870 [ploop]
  [<ffffffffa086cfce>] ploop_thread+0x66e/0x1170 [ploop]
  [<ffffffff8121dcb6>] kthread+0x1e6/0x250
  [<ffffffff8258ae58>] ret_from_fork+0x58/0x90

Add cond_resched() on each iteration of the ploop_thread() to fix that.

https://jira.sw.ru/browse/PSBM-66617
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 drivers/block/ploop/dev.c     | 22 ++++++++++------------
 drivers/block/ploop/discard.c |  2 +-
 include/linux/ploop/ploop.h   |  3 +--
 3 files changed, 12 insertions(+), 15 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index e7aec54172c0..eab6cb067708 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -230,8 +230,7 @@  static void ploop_test_and_clear_blockable(struct ploop_device *plo,
 }
 
 /* always called with plo->lock released */
-void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
-		      int keep_locked)
+void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list)
 {
 	struct ploop_request * preq;
 	int drop_qlen = 0;
@@ -261,8 +260,7 @@  void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
 
 	ploop_uncongest(plo);
 
-	if (!keep_locked)
-		spin_unlock_irq(&plo->lock);
+	spin_unlock_irq(&plo->lock);
 }
 
 static void merge_rw_flags_to_req(unsigned long rw,
@@ -1066,7 +1064,7 @@  out:
 	blk_check_plugged(ploop_unplug, plo, sizeof(struct blk_plug_cb));
 
 	if (!list_empty(&drop_list))
-		ploop_preq_drop(plo, &drop_list, 0);
+		ploop_preq_drop(plo, &drop_list);
 
 	return;
 }
@@ -2964,12 +2962,12 @@  static int ploop_thread(void * data)
 	set_user_nice(current, -20);
 
 	blk_start_plug(&plug);
-	spin_lock_irq(&plo->lock);
 	for (;;) {
 		/* Convert bios to preqs early (at least before processing
 		 * entry queue) to increase chances of bio merge
 		 */
-	again:
+		cond_resched();
+		spin_lock_irq(&plo->lock);
 		BUG_ON (!list_empty(&drop_list));
 
 		process_pending_bios(plo, &drop_list);
@@ -2978,8 +2976,8 @@  static int ploop_thread(void * data)
 
 		if (!list_empty(&drop_list)) {
 			spin_unlock_irq(&plo->lock);
-			ploop_preq_drop(plo, &drop_list, 1);
-			goto again;
+			ploop_preq_drop(plo, &drop_list);
+			continue;
 		}
 
 		if (!list_empty(&plo->ready_queue)) {
@@ -2991,7 +2989,6 @@  static int ploop_thread(void * data)
 
 			ploop_req_state_process(preq);
 
-			spin_lock_irq(&plo->lock);
 			continue;
 		}
 
@@ -3010,6 +3007,7 @@  static int ploop_thread(void * data)
 				set_bit(PLOOP_S_ATTENTION, &plo->state);
 				if (plo->active_reqs) {
 					list_add(&preq->list, &plo->entry_queue);
+					spin_unlock_irq(&plo->lock);
 					continue;
 				}
 				plo->barrier_reqs--;
@@ -3042,8 +3040,6 @@  static int ploop_thread(void * data)
 			spin_unlock_irq(&plo->lock);
 
 			ploop_req_state_process(preq);
-
-			spin_lock_irq(&plo->lock);
 			continue;
 		}
 
@@ -3058,8 +3054,10 @@  static int ploop_thread(void * data)
 
 wait_more:
 		ploop_wait(plo, once, &plug);
+		spin_unlock_irq(&plo->lock);
 		once = 0;
 	}
+
 	spin_unlock_irq(&plo->lock);
 	blk_finish_plug(&plug);
 
diff --git a/drivers/block/ploop/discard.c b/drivers/block/ploop/discard.c
index 828ab3696469..33122497b9c5 100644
--- a/drivers/block/ploop/discard.c
+++ b/drivers/block/ploop/discard.c
@@ -66,7 +66,7 @@  int ploop_discard_fini_ioc(struct ploop_device *plo)
 	spin_unlock_irq(&plo->lock);
 
 	if (!list_empty(&drop_list))
-		ploop_preq_drop(plo, &drop_list, 0);
+		ploop_preq_drop(plo, &drop_list);
 
 	if (plo->maintenance_type != PLOOP_MNTN_DISCARD) {
 		ret = -EBUSY;
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index c7261c4bda57..f13b7caa4128 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -633,8 +633,7 @@  static inline struct ploop_delta * map_top_delta(struct ploop_map * map)
 
 void ploop_complete_io_state(struct ploop_request * preq);
 void ploop_fail_request(struct ploop_request * preq, int err);
-void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
-		      int keep_locked);
+void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list);
 
 
 static inline int ploop_req_delay_fua_possible(struct ploop_request *preq)

Comments

Konstantin Khorenko June 5, 2017, 12:45 p.m.
Maxim, please review the patch.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 05/29/2017 12:32 PM, Andrey Ryabinin wrote:
> Ploop thread may not reschedule for a long time and cause softlockup
> warnings on a debug kernel:
>
>  NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ploop56082:13917]
> ...
>  RIP: 0010:[<ffffffff8256f426>]  [<ffffffff8256f426>] _raw_spin_unlock_irqrestore+0x46/0xc0
> ...
>  Call Trace:
>   [<ffffffff816194f7>] __slab_free+0x207/0x2f0
>   [<ffffffff81619ff1>] kmem_cache_free+0x2d1/0x380
>   [<ffffffff81514412>] mempool_free_slab+0x22/0x30
>   [<ffffffff815159ae>] mempool_free+0x14e/0x1f0
>   [<ffffffff81747d6a>] bio_put+0x17a/0x270
>   [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
>   [<ffffffff81747d6a>] bio_put+0x17a/0x270
>   [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
>   [<ffffffff8174a5ac>] bio_endio+0x13c/0x220
>   [<ffffffffa0858a38>] ploop_complete_request+0x148/0x1c60 [ploop]
>   [<ffffffffa086b06d>] ploop_req_state_process+0x1f7d/0x3870 [ploop]
>   [<ffffffffa086cfce>] ploop_thread+0x66e/0x1170 [ploop]
>   [<ffffffff8121dcb6>] kthread+0x1e6/0x250
>   [<ffffffff8258ae58>] ret_from_fork+0x58/0x90
>
> Add cond_resched() on each iteration of the ploop_thread() to fix that.
>
> https://jira.sw.ru/browse/PSBM-66617
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c     | 22 ++++++++++------------
>  drivers/block/ploop/discard.c |  2 +-
>  include/linux/ploop/ploop.h   |  3 +--
>  3 files changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index e7aec54172c0..eab6cb067708 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -230,8 +230,7 @@ static void ploop_test_and_clear_blockable(struct ploop_device *plo,
>  }
>
>  /* always called with plo->lock released */
> -void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
> -		      int keep_locked)
> +void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list)
>  {
>  	struct ploop_request * preq;
>  	int drop_qlen = 0;
> @@ -261,8 +260,7 @@ void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
>
>  	ploop_uncongest(plo);
>
> -	if (!keep_locked)
> -		spin_unlock_irq(&plo->lock);
> +	spin_unlock_irq(&plo->lock);
>  }
>
>  static void merge_rw_flags_to_req(unsigned long rw,
> @@ -1066,7 +1064,7 @@ out:
>  	blk_check_plugged(ploop_unplug, plo, sizeof(struct blk_plug_cb));
>
>  	if (!list_empty(&drop_list))
> -		ploop_preq_drop(plo, &drop_list, 0);
> +		ploop_preq_drop(plo, &drop_list);
>
>  	return;
>  }
> @@ -2964,12 +2962,12 @@ static int ploop_thread(void * data)
>  	set_user_nice(current, -20);
>
>  	blk_start_plug(&plug);
> -	spin_lock_irq(&plo->lock);
>  	for (;;) {
>  		/* Convert bios to preqs early (at least before processing
>  		 * entry queue) to increase chances of bio merge
>  		 */
> -	again:
> +		cond_resched();
> +		spin_lock_irq(&plo->lock);
>  		BUG_ON (!list_empty(&drop_list));
>
>  		process_pending_bios(plo, &drop_list);
> @@ -2978,8 +2976,8 @@ static int ploop_thread(void * data)
>
>  		if (!list_empty(&drop_list)) {
>  			spin_unlock_irq(&plo->lock);
> -			ploop_preq_drop(plo, &drop_list, 1);
> -			goto again;
> +			ploop_preq_drop(plo, &drop_list);
> +			continue;
>  		}
>
>  		if (!list_empty(&plo->ready_queue)) {
> @@ -2991,7 +2989,6 @@ static int ploop_thread(void * data)
>
>  			ploop_req_state_process(preq);
>
> -			spin_lock_irq(&plo->lock);
>  			continue;
>  		}
>
> @@ -3010,6 +3007,7 @@ static int ploop_thread(void * data)
>  				set_bit(PLOOP_S_ATTENTION, &plo->state);
>  				if (plo->active_reqs) {
>  					list_add(&preq->list, &plo->entry_queue);
> +					spin_unlock_irq(&plo->lock);
>  					continue;
>  				}
>  				plo->barrier_reqs--;
> @@ -3042,8 +3040,6 @@ static int ploop_thread(void * data)
>  			spin_unlock_irq(&plo->lock);
>
>  			ploop_req_state_process(preq);
> -
> -			spin_lock_irq(&plo->lock);
>  			continue;
>  		}
>
> @@ -3058,8 +3054,10 @@ static int ploop_thread(void * data)
>
>  wait_more:
>  		ploop_wait(plo, once, &plug);
> +		spin_unlock_irq(&plo->lock);
>  		once = 0;
>  	}
> +
>  	spin_unlock_irq(&plo->lock);
>  	blk_finish_plug(&plug);
>
> diff --git a/drivers/block/ploop/discard.c b/drivers/block/ploop/discard.c
> index 828ab3696469..33122497b9c5 100644
> --- a/drivers/block/ploop/discard.c
> +++ b/drivers/block/ploop/discard.c
> @@ -66,7 +66,7 @@ int ploop_discard_fini_ioc(struct ploop_device *plo)
>  	spin_unlock_irq(&plo->lock);
>
>  	if (!list_empty(&drop_list))
> -		ploop_preq_drop(plo, &drop_list, 0);
> +		ploop_preq_drop(plo, &drop_list);
>
>  	if (plo->maintenance_type != PLOOP_MNTN_DISCARD) {
>  		ret = -EBUSY;
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index c7261c4bda57..f13b7caa4128 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -633,8 +633,7 @@ static inline struct ploop_delta * map_top_delta(struct ploop_map * map)
>
>  void ploop_complete_io_state(struct ploop_request * preq);
>  void ploop_fail_request(struct ploop_request * preq, int err);
> -void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
> -		      int keep_locked);
> +void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list);
>
>
>  static inline int ploop_req_delay_fua_possible(struct ploop_request *preq)
>
Maxim Patlasov June 5, 2017, 11:03 p.m.
Acked-by: Maxim Patlasov <mpatlasov@virtuozzo.com>


On 06/05/2017 05:45 AM, Konstantin Khorenko wrote:
> Maxim, please review the patch.
>
> -- 
> Best regards,
>
> Konstantin Khorenko,
> Virtuozzo Linux Kernel Team
>
> On 05/29/2017 12:32 PM, Andrey Ryabinin wrote:
>> Ploop thread may not reschedule for a long time and cause softlockup
>> warnings on a debug kernel:
>>
>>  NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! 
>> [ploop56082:13917]
>> ...
>>  RIP: 0010:[<ffffffff8256f426>] [<ffffffff8256f426>] 
>> _raw_spin_unlock_irqrestore+0x46/0xc0
>> ...
>>  Call Trace:
>>   [<ffffffff816194f7>] __slab_free+0x207/0x2f0
>>   [<ffffffff81619ff1>] kmem_cache_free+0x2d1/0x380
>>   [<ffffffff81514412>] mempool_free_slab+0x22/0x30
>>   [<ffffffff815159ae>] mempool_free+0x14e/0x1f0
>>   [<ffffffff81747d6a>] bio_put+0x17a/0x270
>>   [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
>>   [<ffffffff81747d6a>] bio_put+0x17a/0x270
>>   [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
>>   [<ffffffff8174a5ac>] bio_endio+0x13c/0x220
>>   [<ffffffffa0858a38>] ploop_complete_request+0x148/0x1c60 [ploop]
>>   [<ffffffffa086b06d>] ploop_req_state_process+0x1f7d/0x3870 [ploop]
>>   [<ffffffffa086cfce>] ploop_thread+0x66e/0x1170 [ploop]
>>   [<ffffffff8121dcb6>] kthread+0x1e6/0x250
>>   [<ffffffff8258ae58>] ret_from_fork+0x58/0x90
>>
>> Add cond_resched() on each iteration of the ploop_thread() to fix that.
>>
>> https://jira.sw.ru/browse/PSBM-66617
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> ---
>>  drivers/block/ploop/dev.c     | 22 ++++++++++------------
>>  drivers/block/ploop/discard.c |  2 +-
>>  include/linux/ploop/ploop.h   |  3 +--
>>  3 files changed, 12 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index e7aec54172c0..eab6cb067708 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -230,8 +230,7 @@ static void ploop_test_and_clear_blockable(struct 
>> ploop_device *plo,
>>  }
>>
>>  /* always called with plo->lock released */
>> -void ploop_preq_drop(struct ploop_device * plo, struct list_head 
>> *drop_list,
>> -              int keep_locked)
>> +void ploop_preq_drop(struct ploop_device * plo, struct list_head 
>> *drop_list)
>>  {
>>      struct ploop_request * preq;
>>      int drop_qlen = 0;
>> @@ -261,8 +260,7 @@ void ploop_preq_drop(struct ploop_device * plo, 
>> struct list_head *drop_list,
>>
>>      ploop_uncongest(plo);
>>
>> -    if (!keep_locked)
>> -        spin_unlock_irq(&plo->lock);
>> +    spin_unlock_irq(&plo->lock);
>>  }
>>
>>  static void merge_rw_flags_to_req(unsigned long rw,
>> @@ -1066,7 +1064,7 @@ out:
>>      blk_check_plugged(ploop_unplug, plo, sizeof(struct blk_plug_cb));
>>
>>      if (!list_empty(&drop_list))
>> -        ploop_preq_drop(plo, &drop_list, 0);
>> +        ploop_preq_drop(plo, &drop_list);
>>
>>      return;
>>  }
>> @@ -2964,12 +2962,12 @@ static int ploop_thread(void * data)
>>      set_user_nice(current, -20);
>>
>>      blk_start_plug(&plug);
>> -    spin_lock_irq(&plo->lock);
>>      for (;;) {
>>          /* Convert bios to preqs early (at least before processing
>>           * entry queue) to increase chances of bio merge
>>           */
>> -    again:
>> +        cond_resched();
>> +        spin_lock_irq(&plo->lock);
>>          BUG_ON (!list_empty(&drop_list));
>>
>>          process_pending_bios(plo, &drop_list);
>> @@ -2978,8 +2976,8 @@ static int ploop_thread(void * data)
>>
>>          if (!list_empty(&drop_list)) {
>>              spin_unlock_irq(&plo->lock);
>> -            ploop_preq_drop(plo, &drop_list, 1);
>> -            goto again;
>> +            ploop_preq_drop(plo, &drop_list);
>> +            continue;
>>          }
>>
>>          if (!list_empty(&plo->ready_queue)) {
>> @@ -2991,7 +2989,6 @@ static int ploop_thread(void * data)
>>
>>              ploop_req_state_process(preq);
>>
>> -            spin_lock_irq(&plo->lock);
>>              continue;
>>          }
>>
>> @@ -3010,6 +3007,7 @@ static int ploop_thread(void * data)
>>                  set_bit(PLOOP_S_ATTENTION, &plo->state);
>>                  if (plo->active_reqs) {
>>                      list_add(&preq->list, &plo->entry_queue);
>> +                    spin_unlock_irq(&plo->lock);
>>                      continue;
>>                  }
>>                  plo->barrier_reqs--;
>> @@ -3042,8 +3040,6 @@ static int ploop_thread(void * data)
>>              spin_unlock_irq(&plo->lock);
>>
>>              ploop_req_state_process(preq);
>> -
>> -            spin_lock_irq(&plo->lock);
>>              continue;
>>          }
>>
>> @@ -3058,8 +3054,10 @@ static int ploop_thread(void * data)
>>
>>  wait_more:
>>          ploop_wait(plo, once, &plug);
>> +        spin_unlock_irq(&plo->lock);
>>          once = 0;
>>      }
>> +
>>      spin_unlock_irq(&plo->lock);
>>      blk_finish_plug(&plug);
>>
>> diff --git a/drivers/block/ploop/discard.c 
>> b/drivers/block/ploop/discard.c
>> index 828ab3696469..33122497b9c5 100644
>> --- a/drivers/block/ploop/discard.c
>> +++ b/drivers/block/ploop/discard.c
>> @@ -66,7 +66,7 @@ int ploop_discard_fini_ioc(struct ploop_device *plo)
>>      spin_unlock_irq(&plo->lock);
>>
>>      if (!list_empty(&drop_list))
>> -        ploop_preq_drop(plo, &drop_list, 0);
>> +        ploop_preq_drop(plo, &drop_list);
>>
>>      if (plo->maintenance_type != PLOOP_MNTN_DISCARD) {
>>          ret = -EBUSY;
>> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
>> index c7261c4bda57..f13b7caa4128 100644
>> --- a/include/linux/ploop/ploop.h
>> +++ b/include/linux/ploop/ploop.h
>> @@ -633,8 +633,7 @@ static inline struct ploop_delta * 
>> map_top_delta(struct ploop_map * map)
>>
>>  void ploop_complete_io_state(struct ploop_request * preq);
>>  void ploop_fail_request(struct ploop_request * preq, int err);
>> -void ploop_preq_drop(struct ploop_device * plo, struct list_head 
>> *drop_list,
>> -              int keep_locked);
>> +void ploop_preq_drop(struct ploop_device * plo, struct list_head 
>> *drop_list);
>>
>>
>>  static inline int ploop_req_delay_fua_possible(struct ploop_request 
>> *preq)
>>