[Devel,RHEL7,COMMIT] cgroup/ve: fix possible circular locking dependency

Submitted by Konstantin Khorenko on Feb. 9, 2017, 12:57 p.m.

Details

Message ID 201702091257.v19CvEnR018037@finist_cl7.x64_64.work.ct
State New
Series "cgroup/ve: fix possible circular locking dependency"
Headers show

Commit Message

Konstantin Khorenko Feb. 9, 2017, 12:57 p.m.
The commit is pushed to "branch-rh7-3.10.0-514.6.1.vz7.28.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.6.1.vz7.28.5
------>
commit a0dd95a9880700d4e5d68ec4490725c60f8b5dd0
Author: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date:   Thu Feb 9 16:57:14 2017 +0400

    cgroup/ve: fix possible circular locking dependency
    
    cgroup_add_ve_root_files() takes ->i_mutex with cgroup_mutex
    held which is unsafe because it violates lock order in the rest
    of cgroup code:
    
     ======================================================
     [ INFO: possible circular locking dependency detected ]
     3.10.0-514.6.1.vz7.28.3.debug #1 Not tainted
     -------------------------------------------------------
     vzctl/4073 is trying to acquire lock:
      (&sb->s_type->i_mutex_key#7){+.+.+.}, at: [<ffffffff8137f5b9>] cgroup_mark_ve_root+0x129/0x2c0
    
    is already holding lock:
      (cgroup_mutex){+.+.+.}, at: [<ffffffff8137f4c1>] cgroup_mark_ve_root+0x31/0x2c0
    which lock already depends on the new lock.
    the existing dependency chain (in reverse order) is:
    
    -> #1 (cgroup_mutex){+.+.+.}:
            [<ffffffff813280eb>] __lock_acquire+0x75b/0x12c0
            [<ffffffff8132971a>] lock_acquire+0x10a/0x400
            [<ffffffff824ef6de>] mutex_lock_nested+0x12e/0xbc0
            [<ffffffff8137c527>] cgroup_mount+0x897/0xe90
            [<ffffffff81654dde>] mount_fs+0x3e/0x240
            [<ffffffff816b187b>] vfs_kern_mount+0x6b/0x260
            [<ffffffff816b1aab>] kern_mount_data+0x3b/0xa0
            [<ffffffff813809c8>] cgroup_kernel_mount+0x18/0x20
            [<ffffffff83194822>] ub_init_cgroup+0xd7/0x1a4
            [<ffffffff81002150>] do_one_initcall+0x120/0x350
            [<ffffffff8313da86>] kernel_init_freeable+0x4cd/0x59a
            [<ffffffff824c96de>] kernel_init+0xe/0xf0
            [<ffffffff82516158>] ret_from_fork+0x58/0x90
    
    -> #0 (&sb->s_type->i_mutex_key#7){+.+.+.}:
            [<ffffffff81323d9a>] validate_chain.isra.44+0x1f7a/0x2ef0
            [<ffffffff813280eb>] __lock_acquire+0x75b/0x12c0
            [<ffffffff8132971a>] lock_acquire+0x10a/0x400
            [<ffffffff824ef6de>] mutex_lock_nested+0x12e/0xbc0
            [<ffffffff8137f5b9>] cgroup_mark_ve_root+0x129/0x2c0
            [<ffffffff81310648>] ve_start_container+0x978/0xac0
            [<ffffffff81311c80>] ve_state_write+0x320/0x8e0
            [<ffffffff81372a0d>] cgroup_file_write+0x5fd/0xb90
            [<ffffffff816498f2>] vfs_write+0x1e2/0x650
            [<ffffffff8164c1e8>] SyS_write+0x178/0x270
            [<ffffffff82516209>] system_call_fastpath+0x16/0x1
    
      Possible unsafe locking scenario:
            CPU0                    CPU1
            ----                    ----
       lock(cgroup_mutex);
                                    lock(&sb->s_type->i_mutex_key#7);
                                    lock(cgroup_mutex);
       lock(&sb->s_type->i_mutex_key#7);
    
    Instead of adding 'cgroup.subgroups_limit' files only on VE_ROOT cgroups let's
    add them on every cgroup, but allow writing to it only in VE_ROOT cgroup.
    This is much more simpler and allow us to kill a lot of bogus code.
    
    https://jira.sw.ru/browse/PSBM-59122
    
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
    Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
 include/linux/cgroup.h |  1 -
 kernel/cgroup.c        | 34 ++++------------------------------
 2 files changed, 4 insertions(+), 31 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d99251b..ea02186 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -446,7 +446,6 @@  struct cgroup_map_cb {
 #define CFTYPE_NOT_ON_ROOT	(1U << 1)	/* don't create on root cg */
 #define CFTYPE_INSANE		(1U << 2)	/* don't create if sane_behavior */
 #define CFTYPE_VE_WRITABLE	(1U << 15)	/* allow write from CT */
-#define CFTYPE_ONLY_ON_VE_ROOT	(1U << 16)	/* only create on CT's root cg */
 
 #define MAX_CFTYPE_NAME		64
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e8ec5f2..ae9ef0d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2807,9 +2807,6 @@  static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 			continue;
 		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
 			continue;
-		if ((cft->flags & CFTYPE_ONLY_ON_VE_ROOT) &&
-				!test_bit(CGRP_VE_ROOT, &cgrp->flags))
-			continue;
 
 		if (is_add) {
 			err = cgroup_add_file(cgrp, subsys, cft);
@@ -4049,6 +4046,9 @@  static int cgroup_write_subgroups_limit(struct cgroup *cgrp,
 					struct cftype *cft,
 					u64 val)
 {
+	if (!test_bit(CGRP_VE_ROOT, &cgrp->flags))
+		return -EACCES;
+
 	cgrp->subgroups_limit = val;
 	return 0;
 }
@@ -4105,7 +4105,6 @@  static struct cftype files[] = {
 	},
 	{
 		.name = "cgroup.subgroups_limit",
-		.flags = CFTYPE_ONLY_ON_VE_ROOT,
 		.read_u64 = cgroup_read_subgroups_limit,
 		.write_u64 = cgroup_write_subgroups_limit,
 		.mode = S_IRUGO | S_IWUSR,
@@ -4239,30 +4238,6 @@  static int subgroups_count(struct cgroup *cgroup)
 }
 
 #ifdef CONFIG_VE
-static void cgroup_add_ve_root_files(struct cgroup *cgrp,
-				struct cgroup_subsys *subsys,
-				struct cftype cfts[])
-{
-	struct cftype *cft;
-
-	if (!test_bit(CGRP_VE_ROOT, &cgrp->flags))
-		return;
-
-	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
-	for (cft = cfts; cft->name[0] != '\0'; cft++) {
-		int err;
-
-		if (!(cft->flags & CFTYPE_ONLY_ON_VE_ROOT))
-			continue;
-
-		err = cgroup_add_file(cgrp, subsys, cft);
-		if (err)
-			pr_warn("cgroup_add_ve_root_files: failed to add %s, err=%d\n",
-				cft->name, err);
-	}
-	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
-}
-
 void cgroup_mark_ve_root(struct ve_struct *ve)
 {
 	struct cgroup *cgrp;
@@ -4271,8 +4246,7 @@  void cgroup_mark_ve_root(struct ve_struct *ve)
 	mutex_lock(&cgroup_mutex);
 	for_each_active_root(root) {
 		cgrp = task_cgroup_from_root(ve->init_task, root);
-		if (!test_and_set_bit(CGRP_VE_ROOT, &cgrp->flags))
-			cgroup_add_ve_root_files(cgrp, NULL, files);
+		set_bit(CGRP_VE_ROOT, &cgrp->flags);
 	}
 	mutex_unlock(&cgroup_mutex);
 }