[RHEL7,COMMIT] ve/cgroup: Added ve_owner field to cgroup

Submitted by Vasily Averin on Aug. 6, 2020, 5:31 a.m.

Details

Message ID 202008060531.0765VbIV003730@vz7build.vvs.sw.ru
State New
Series "Make release_agent per-cgroup property. Run release_agent in proper ve."
Headers show

Commit Message

Vasily Averin Aug. 6, 2020, 5:31 a.m.
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.el7
------>
commit 8895dbbaaa99f1f64cf8aed563999e0616590bec
Author: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
Date:   Thu Aug 6 08:31:37 2020 +0300

    ve/cgroup: Added ve_owner field to cgroup
    
    Each cgroup representing a host or a container root of
    cgroup subsystem hierarhy will have this field set to
    a valid ve_struct, that owns this root. This way each
    cgroup in a system will be able to know it's owning VE.
    Non root cgroups will have this field set to NULL, this
    is an optimization for cleanup code: at VE destruction
    we only need to iterate over all root cgroups to clean
    reference to former owning VE, rather than over all
    cgroup hierarchy.
    Still any cgroup that wants to know about it's owning
    VE can find it's virtual root cgroup and read it's
    ve_owner field.
    
    cgroup->ve_owner is declared as RCU pointer, because it fits
    RCU semantics - rare writes/often reads. ve_owner will be
    read from multiple locations in code in further patches and
    is only rarely set at cgroup_mark_ve_root/cgroup_mount.
    cgroup_get_ve_owner is a read wrapper for this purpose.
    
    Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
    
    Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 include/linux/cgroup.h |  4 ++++
 kernel/cgroup.c        | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 6e2c206..9b9465c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -327,6 +327,9 @@  struct cgroup {
 	/* directory xattrs */
 	struct simple_xattrs xattrs;
 	u64 subgroups_limit;
+
+	/* ve_owner, responsible for running release agent. */
+	struct ve_struct __rcu *ve_owner;
 };
 
 #define MAX_CGROUP_ROOT_NAMELEN 64
@@ -672,6 +675,7 @@  int cgroup_task_count(const struct cgroup *cgrp);
 #ifdef CONFIG_VE
 void cgroup_mark_ve_roots(struct ve_struct *ve);
 void cgroup_unmark_ve_roots(struct ve_struct *ve);
+struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp);
 #endif
 
 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index db4be63..fe02e92 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -810,6 +810,42 @@  static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
 	return name;
 }
 
+struct cgroup *cgroup_get_local_root(struct cgroup *cgrp)
+{
+	/*
+	 * Find nearest root cgroup, which might be host cgroup root
+	 * or ve cgroup root.
+	 *
+	 *    <host_root_cgroup> -> local_root
+	 *     \                    ^
+	 *      <cgroup>            |
+	 *       \                  |
+	 *        <cgroup>   --->   from here
+	 *        \
+	 *         <ve_root_cgroup> -> local_root
+	 *         \                   ^
+	 *          <cgroup>           |
+	 *          \                  |
+	 *           <cgroup>  --->    from here
+	 */
+	while (cgrp->parent && !test_bit(CGRP_VE_ROOT, &cgrp->flags))
+		cgrp = cgrp->parent;
+
+	return cgrp;
+}
+
+struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp)
+{
+	struct ve_struct *ve;
+	/* Caller should hold RCU */
+
+	cgrp = cgroup_get_local_root(cgrp);
+	ve = rcu_dereference(cgrp->ve_owner);
+	if (!ve)
+		ve = get_ve0();
+	return ve;
+}
+
 static void cgroup_free_fn(struct work_struct *work)
 {
 	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
@@ -1796,6 +1832,8 @@  static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		ret = PTR_ERR(new_root);
 		goto drop_modules;
 	}
+
+	RCU_INIT_POINTER(new_root->top_cgroup.ve_owner, &ve0);
 	opts.new_root = new_root;
 
 	/* Locate an existing or new sb for this hierarchy */
@@ -4512,12 +4550,14 @@  void cgroup_mark_ve_roots(struct ve_struct *ve)
 	mutex_lock(&cgroup_mutex);
 	for_each_active_root(root) {
 		cgrp = task_cgroup_from_root(ve->init_task, root);
+		rcu_assign_pointer(cgrp->ve_owner, ve);
 		set_bit(CGRP_VE_ROOT, &cgrp->flags);
 
 		if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask))
 			link_ve_root_cpu_cgroup(cgrp);
 	}
 	mutex_unlock(&cgroup_mutex);
+	synchronize_rcu();
 }
 
 void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -4528,9 +4568,14 @@  void cgroup_unmark_ve_roots(struct ve_struct *ve)
 	mutex_lock(&cgroup_mutex);
 	for_each_active_root(root) {
 		cgrp = css_cgroup_from_root(ve->root_css_set, root);
+		BUG_ON(!rcu_dereference_protected(cgrp->ve_owner,
+				lockdep_is_held(&cgroup_mutex)));
+		rcu_assign_pointer(cgrp->ve_owner, NULL);
 		clear_bit(CGRP_VE_ROOT, &cgrp->flags);
 	}
 	mutex_unlock(&cgroup_mutex);
+	/* ve_owner == NULL will be visible */
+	synchronize_rcu();
 }
 
 struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)