[vz7,v2,1/2] ploop: store exec_ub in ploop request and use it while processing requests

Submitted by Konstantin Khorenko on Aug. 27, 2018, 12:20 p.m.

Details

Message ID 20180827122055.873-1-khorenko@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Konstantin Khorenko Aug. 27, 2018, 12:20 p.m.
ploop_req_state_process() is intended to set up proper exec_ub
while processing ploop requests, but it uses preq->ioc->ioc_ub which is
almost always incorrect.

ploop requests are created in kworker which is run in host:

kworker/u16:5   319 [005]   841.012218:   probe:ploop_grab_iocontext: (ffffffffc0004542)
                    45b3 ploop_make_request (/lib/modules/3.10.0/kernel/drivers/block/ploop/ploop.ko)
                  5345bb generic_make_request ([kernel.kallsyms])
                  534843 submit_bio ([kernel.kallsyms])
                   12185 ext4_io_submit ([ext4])
                   123e9 ext4_bio_write_page ([ext4])
                    82bd mpage_submit_page ([ext4])
                    8568 mpage_map_and_submit_buffers ([ext4])
                    dff5 ext4_writepages ([ext4])
                  3b0221 do_writepages ([kernel.kallsyms])
                  460dba __writeback_single_inode ([kernel.kallsyms])
                  4613b5 writeback_sb_inodes ([kernel.kallsyms])
                  4617b2 __writeback_inodes_wb ([kernel.kallsyms])
                  461a43 wb_writeback ([kernel.kallsyms])
                  462376 bdi_writeback_workfn ([kernel.kallsyms])
                  2b11d5 process_one_work ([kernel.kallsyms])
                  2b237e worker_thread ([kernel.kallsyms])
                  2b9781 kthread ([kernel.kallsyms])
                  90ebf7 ret_from_fork ([kernel.kallsyms])

"kworker/u16:5" has exec_ub == ub0
and ploop_make_request() stores io_context from "current" == kworker in bio and
sets flag BIO_BDEV_REUSED:

queue:
        BUG_ON (bio->bi_bdev != plo->bdev && bio_sectors(bio));
        if (bio->bi_bdev == plo->bdev) {
                BUG_ON (test_bit(BIO_BDEV_REUSED, &bio->bi_flags));
                ploop_grab_iocontext(bio);
        }

Patch hide | download patch | download mbox

==========
static void ploop_grab_iocontext(struct bio *bio)
{
        struct io_context **ioc_pp = (struct io_context **)(&bio->bi_bdev);
        if (current->io_context) {
                ioc_task_link(current->io_context);
                *ioc_pp = current->io_context;
==========
Meanwhile ploop_thread() processes bios:

  ploop_thread
   process_pending_bios
    ploop_bio_queue
     get a free ploop request
     if BIO_BDEV_REUSED is set to bio, preq->ioc = saved io_context (with ub0)

ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
                struct list_head *drop_list, int account_blockable)
{
        if (test_bit(BIO_BDEV_REUSED, &bio->bi_flags)) {
                    preq->ioc = (struct io_context *)(bio->bi_bdev);
                    bio->bi_bdev = plo->bdev;
                    clear_bit(BIO_BDEV_REUSED, &bio->bi_flags);

=> now preq->ioc contains exec_ub == ub0
later ploop_thread() -> ploop_req_state_process() executes
set_exec_ub(preq->ioc->ioc_ub)		// == ub0

This patch adds preq::preq_ub field to "ploop_request" and sets it up properly
on ploop request creation.

Q: Why do we have proper current->exec_ub on ploop request creation time?
A: See calltrace above - it goes through __writeback_single_inode which
   sets proper exec_ub.

__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
{
...
        ub = rcu_dereference(inode->i_mapping->dirtied_ub);
...
        ub = set_exec_ub(ub);
        ret = __do_writeback_single_inode(inode, wbc);
        put_beancounter(set_exec_ub(ub));
...

https://jira.sw.ru/browse/PSBM-86910

Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>

v2 changes:
 - get correct ub in ploop_req_state_process()
---
 drivers/block/ploop/dev.c   | 45 ++++++++++++++++++++++++++++++++++++++-------
 include/linux/ploop/ploop.h |  2 ++
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index c6094144c7a5..a71f28476eca 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -22,6 +22,7 @@ 
 
 #include <trace/events/block.h>
 
+#include <bc/beancounter.h>
 #include <linux/ploop/ploop.h>
 #include "ploop_events.h"
 #include "freeblks.h"
@@ -241,6 +242,10 @@  void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list)
 			put_io_context_active(preq->ioc);
 			preq->ioc = NULL;
 		}
+#ifdef CONFIG_BEANCOUNTERS
+		put_beancounter(preq->preq_ub);
+		preq->preq_ub = NULL;
+#endif
 
 		BUG_ON (test_bit(PLOOP_REQ_ZERO, &preq->state));
 		ploop_test_and_clear_blockable(plo, preq);
@@ -562,6 +567,14 @@  ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
 		preq->ioc = NULL;
 	}
 
+#ifdef CONFIG_BEANCOUNTERS
+	/* Proper exec_ub is set by __writeback_single_inode().
+	 * preq->ioc->ioc_ub does not fit here because we may be in "kworker"
+	 * and preq->ioc is a link to current->io_context.
+	 */
+	preq->preq_ub = get_beancounter(get_exec_ub());
+#endif
+
 	if (unlikely(bio->bi_rw & REQ_SYNC))
 		__set_bit(PLOOP_REQ_SYNC, &preq->state);
 	if (unlikely(bio == plo->bio_sync)) {
@@ -1324,6 +1337,7 @@  static void ploop_complete_request(struct ploop_request * preq)
 	struct ploop_device * plo = preq->plo;
 	int nr_completed = 0;
 	struct io_context *ioc;
+	struct user_beancounter *ub;
 
 	trace_complete_request(preq);
 
@@ -1417,6 +1431,10 @@  static void ploop_complete_request(struct ploop_request * preq)
 
 	ioc = preq->ioc;
 	preq->ioc = NULL;
+#ifdef CONFIG_BEANCOUNTERS
+	ub = preq->preq_ub;
+	preq->preq_ub = NULL;
+#endif
 
 	plo->active_reqs--;
 
@@ -1451,6 +1469,9 @@  static void ploop_complete_request(struct ploop_request * preq)
 		atomic_dec(&ioc->nr_tasks);
 		put_io_context_active(ioc);
 	}
+#ifdef CONFIG_BEANCOUNTERS
+	put_beancounter(ub);
+#endif
 }
 
 void ploop_fail_request(struct ploop_request * preq, int err)
@@ -2513,7 +2534,7 @@  static void ploop_req_state_process(struct ploop_request * preq)
 	struct io_context * saved_ioc = NULL;
 	int release_ioc = 0;
 #ifdef CONFIG_BEANCOUNTERS
-	struct user_beancounter * uninitialized_var(saved_ub);
+	struct user_beancounter *saved_ub;
 #endif
 
 	trace_req_state_process(preq);
@@ -2521,12 +2542,13 @@  static void ploop_req_state_process(struct ploop_request * preq)
 	if (preq->ioc) {
 		saved_ioc = current->io_context;
 		current->io_context = preq->ioc;
-#ifdef CONFIG_BEANCOUNTERS
-		saved_ub = set_exec_ub(preq->ioc->ioc_ub);
-#endif
 		atomic_long_inc(&preq->ioc->refcount);
 		release_ioc = 1;
 	}
+#ifdef CONFIG_BEANCOUNTERS
+	get_beancounter(preq->preq_ub);
+	saved_ub = set_exec_ub(preq->preq_ub);
+#endif
 
 	if (preq->eng_state != PLOOP_E_COMPLETE &&
 	    test_bit(PLOOP_REQ_SYNC, &preq->state))
@@ -2586,6 +2608,10 @@  static void ploop_req_state_process(struct ploop_request * preq)
 			preq->eng_state = PLOOP_E_COMPLETE;
 			preq->error = -EOPNOTSUPP;
 			ploop_complete_io_state(preq);
+#ifdef CONFIG_BEANCOUNTERS
+			saved_ub = set_exec_ub(saved_ub);
+			put_beancounter(saved_ub);
+#endif
 			return;
 		}
 
@@ -2861,11 +2887,12 @@  static void ploop_req_state_process(struct ploop_request * preq)
 	if (release_ioc) {
 		struct io_context * ioc = current->io_context;
 		current->io_context = saved_ioc;
-#ifdef CONFIG_BEANCOUNTERS
-		set_exec_ub(saved_ub);
-#endif
 		put_io_context(ioc);
 	}
+#ifdef CONFIG_BEANCOUNTERS
+	saved_ub = set_exec_ub(saved_ub);
+	put_beancounter(saved_ub);
+#endif
 }
 
 static void ploop_wait(struct ploop_device * plo, int once, struct blk_plug *plug)
@@ -3372,6 +3399,7 @@  void ploop_quiesce(struct ploop_device * plo)
 	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_BARRIER);
 	preq->error = 0;
 	preq->tstamp = jiffies;
+	preq->preq_ub = get_beancounter(get_exec_ub());
 
 	init_completion(&qcomp);
 	init_completion(&plo->relax_comp);
@@ -3669,6 +3697,7 @@  static void ploop_merge_process(struct ploop_device * plo)
 		preq->tstamp = jiffies;
 		preq->iblock = 0;
 		preq->prealloc_size = 0;
+		preq->preq_ub = get_beancounter(get_exec_ub());
 
 		atomic_inc(&plo->maintenance_cnt);
 
@@ -4292,6 +4321,7 @@  static void ploop_relocate(struct ploop_device * plo, int grow_stage)
 	preq->tstamp = jiffies;
 	preq->iblock = (reloc_type == PLOOP_REQ_RELOC_A) ? 0 : plo->grow_start;
 	preq->prealloc_size = 0;
+	preq->preq_ub = get_beancounter(get_exec_ub());
 
 	atomic_inc(&plo->maintenance_cnt);
 
@@ -4617,6 +4647,7 @@  static void ploop_relocblks_process(struct ploop_device *plo)
 		preq->tstamp = jiffies;
 		preq->iblock = 0;
 		preq->prealloc_size = 0;
+		preq->preq_ub = get_beancounter(get_exec_ub());
 
 		atomic_inc(&plo->maintenance_cnt);
 
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index fd683f79956d..fbab9fbeb9ef 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -618,6 +618,8 @@  struct ploop_request
 	/* if the engine starts operation on particular io, let's finish
 	 * the operation on the same io (see io.ops->post_submit) */
 	struct ploop_io	       *eng_io;
+
+	struct user_beancounter *preq_ub;
 };
 
 static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)