[RHEL8,COMMIT] cgroup/ve: Pass cgroup_root to ve_set(get)_release_agent

Submitted by Konstantin Khorenko on March 3, 2021, 5:21 p.m.

Details

Message ID 202103031721.123HLGJk295348@finist-co8.sw.ru
State New
Series "Port release_agent virtualization from vz7"
Headers show

Commit Message

Konstantin Khorenko March 3, 2021, 5:21 p.m.
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.5
------>
commit f30045d16c1c588a435adb1a0ba138ef0d1895cf
Author: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
Date:   Wed Mar 3 20:21:16 2021 +0300

    cgroup/ve: Pass cgroup_root to ve_set(get)_release_agent
    
    Due to virtualization of release_agent cgroup property, cgroup1_show_options
    has become more complex.
    struct cgroup_root is one of the arguments to that function, it was previously
    holding the value of release_agent. But now this property is per-ve AND
    per-cgroup.  That's why to find the right release_agent value, the code should
    convert cgroup_root into one specific cgroup that is a 'virtual cgroup root' of
    a container, represented by the current VE. Getting ve is trivial but cgroup
    can be found by a helper function that will iterate css_set links under
    cgroup_mutex lock. There is a lock inversion problem when using cgroup_mutex
    in cgroup1_show_options, lockdep shows cgroup_mutex conflicts with
    kernfs_node->dep_map.
    This can be solved easily by converting per-cgroup data structure in VE into
    per-cgroup-root. This way we can provide ve_set(get)release_agent_path directly
    with struct cgroup_root agrument. For each cgroup hierarchy there is only one
    root and for each VE there can only be one virtual root either, that's why
    it is safe to just use cgroup_root as a key to find the proper release_agent
    path in each VE.
    
    Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
    Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    
    =====================
    Patchset description:
    
    ve/cgroup: Port release_agent virtualization from vz7
    
    This patchset ports virtualization of cgroup release_agent
    virtualization from vz7.
    
    Major challanges of porting are differences between vz7 and vz8 cgroup
    implementations:
    - transition of cgroups to kernfs
    - slightly changed locking scheme, which relies on css_set_lock in
      places, previously relied on cgroup_mutex.
    
    There is a small number of patches that have been ported without
    modifications, but most of the patches had suffered a lot of
    modification due to the factors described above.
    
    v1:
      - original patchset
    v2:
      - removed port of CGRP_REMOVED due to the use of CSS_ONLINE in VZ8 for
        same reason
      - changed ve_set(get)_release_agent_path signature for more optimal
      - added ve->is_running check before calling userspace executable
    v3:
      - use goto after check for ve->is_running in last patch
---
 include/linux/ve.h        |  8 +++++---
 kernel/cgroup/cgroup-v1.c | 44 ++++++++++++--------------------------------
 kernel/cgroup/cgroup.c    |  5 +++--
 kernel/ve/ve.c            | 19 +++++++------------
 4 files changed, 27 insertions(+), 49 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/linux/ve.h b/include/linux/ve.h
index 7cef4b39847e..3b487f8a4a50 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -145,12 +145,14 @@  extern int nr_ve;
 void ve_add_to_release_list(struct cgroup *cgrp);
 void ve_rm_from_release_list(struct cgroup *cgrp);
 
-int ve_set_release_agent_path(struct cgroup *cgroot,
+int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root *cgroot,
 	const char *release_agent);
 
-const char *ve_get_release_agent_path(struct cgroup *cgrp_root);
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+	struct cgroup_root *cgroot);
 
-void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp);
+void ve_cleanup_per_cgroot_data(struct ve_struct *ve,
+	struct cgroup_root *cgrp);
 
 extern struct ve_struct *get_ve(struct ve_struct *ve);
 extern void put_ve(struct ve_struct *ve);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 46be2f688503..993ac38b895f 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -577,7 +577,8 @@  static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
 	}
 
 	if (root_cgrp->ve_owner)
-		ret = ve_set_release_agent_path(root_cgrp, strstrip(buf));
+		ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+			root_cgrp->root, strstrip(buf));
 	else
 		ret = -ENODEV;
 
@@ -598,7 +599,9 @@  static int cgroup_release_agent_show(struct seq_file *seq, void *v)
 	root_cgrp = cgroup_get_local_root(cgrp);
 	if (root_cgrp->ve_owner) {
 		rcu_read_lock();
-		release_agent = ve_get_release_agent_path(root_cgrp);
+		release_agent = ve_get_release_agent_path(
+			rcu_dereference(root_cgrp->ve_owner),
+			root_cgrp->root);
 
 		if (release_agent)
 			seq_puts(seq, release_agent);
@@ -910,7 +913,7 @@  void cgroup1_release_agent(struct work_struct *work)
 			goto continue_free;
 		}
 
-		release_agent = ve_get_release_agent_path(root_cgrp);
+		release_agent = ve_get_release_agent_path(ve, root_cgrp->root);
 
 		*agentbuf = 0;
 		if (release_agent)
@@ -931,7 +934,9 @@  void cgroup1_release_agent(struct work_struct *work)
 		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
 		envp[i] = NULL;
 
+
 		mutex_unlock(&cgroup_mutex);
+
 		err = call_usermodehelper_ve(ve, argv[0], argv,
 			envp, UMH_WAIT_EXEC);
 
@@ -939,6 +944,7 @@  void cgroup1_release_agent(struct work_struct *work)
 			pr_warn_ratelimited("cgroup1_release_agent "
 					    "%s %s failed: %d\n",
 					    agentbuf, pathbuf, err);
+		up_write(&ve->op_sem);
 		mutex_lock(&cgroup_mutex);
 continue_free:
 		kfree(pathbuf);
@@ -989,7 +995,6 @@  static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
 	const char *release_agent;
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 	struct cgroup_subsys *ss;
-	struct cgroup *root_cgrp = &root->cgrp;
 	int ssid;
 
 	for_each_subsys(ss, ssid)
@@ -1003,32 +1008,7 @@  static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
 		seq_puts(seq, ",cpuset_v2_mode");
 
 	rcu_read_lock();
-#ifdef CONFIG_VE
-	{
-		struct ve_struct *ve = get_exec_env();
-		struct css_set *cset;
-		struct nsproxy *ve_ns;
-
-		if (!ve_is_super(ve)) {
-			/*
-			 * ve->init_task is NULL in case when cgroup is accessed
-			 * before ve_start_container has been called.
-			 *
-			 * ve->init_task is synchronized via ve->ve_ns rcu, see
-			 * ve_grab_context/drop_context.
-			 */
-			ve_ns = rcu_dereference(ve->ve_ns);
-			if (ve_ns) {
-				spin_lock_irq(&css_set_lock);
-				cset = ve_ns->cgroup_ns->root_cset;
-				BUG_ON(!cset);
-				root_cgrp = cset_cgroup_from_root(cset, root);
-				spin_unlock_irq(&css_set_lock);
-			}
-		}
-	}
-#endif
-	release_agent = ve_get_release_agent_path(root_cgrp);
+	release_agent = ve_get_release_agent_path(get_exec_env(), root);
 	if (release_agent && release_agent[0])
 		seq_show_option(seq, "release_agent", release_agent);
 	rcu_read_unlock();
@@ -1226,8 +1206,8 @@  static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
 
 		root_cgrp = cgroup_get_local_root(&root->cgrp);
 		if (root_cgrp->ve_owner)
-			ret = ve_set_release_agent_path(root_cgrp,
-				opts.release_agent);
+			ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+				root_cgrp->root, opts.release_agent);
 	}
 
 	trace_cgroup_remount(root);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 75997b503d3c..09d328b76dab 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2152,7 +2152,8 @@  void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
 
 	root->flags = opts->flags;
 	if (opts->release_agent)
-		ve_set_release_agent_path(cgrp, opts->release_agent);
+		ve_set_release_agent_path(cgrp->ve_owner, root,
+			opts->release_agent);
 
 	if (opts->name)
 		strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
@@ -2360,7 +2361,7 @@  static void cgroup_kill_sb(struct super_block *sb)
 		percpu_ref_kill(&root->cgrp.self.refcnt);
 	cgroup_put(&root->cgrp);
 	kernfs_kill_sb(sb);
-	ve_cleanup_per_cgroot_data(NULL, &root->cgrp);
+	ve_cleanup_per_cgroot_data(&ve0, root);
 }
 
 struct file_system_type cgroup_fs_type = {
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index e8945e10e070..031b104075c8 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -35,7 +35,7 @@  struct per_cgroot_data {
 	/*
 	 * data is related to this cgroup
 	 */
-	struct cgroup *cgroot;
+	struct cgroup_root *cgroot;
 
 	/*
 	 * path to release agent binaray, that should
@@ -217,7 +217,7 @@  int nr_threads_ve(struct ve_struct *ve)
 EXPORT_SYMBOL(nr_threads_ve);
 
 static struct per_cgroot_data *per_cgroot_data_find_locked(
-	struct list_head *per_cgroot_list, struct cgroup *cgroot)
+	struct list_head *per_cgroot_list, struct cgroup_root *cgroot)
 {
 	struct per_cgroot_data *data;
 
@@ -229,7 +229,7 @@  static struct per_cgroot_data *per_cgroot_data_find_locked(
 }
 
 static inline struct per_cgroot_data *per_cgroot_get_or_create(
-	struct ve_struct *ve, struct cgroup *cgroot)
+	struct ve_struct *ve, struct cgroup_root *cgroot)
 {
 	struct per_cgroot_data *data, *other_data;
 	unsigned long flags;
@@ -263,10 +263,9 @@  static inline struct per_cgroot_data *per_cgroot_get_or_create(
 	return data;
 }
 
-int ve_set_release_agent_path(struct cgroup *cgroot,
+int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root *cgroot,
 	const char *release_agent)
 {
-	struct ve_struct *ve;
 	unsigned long flags;
 	struct per_cgroot_data *data;
 	struct cgroup_rcu_string *new_path, *old_path;
@@ -276,7 +275,6 @@  int ve_set_release_agent_path(struct cgroup *cgroot,
 	 * caller should grab cgroup_mutex to safely use
 	 * ve_owner field
 	 */
-	ve = cgroot->ve_owner;
 	BUG_ON(!ve);
 
 	nbytes = strlen(release_agent);
@@ -304,16 +302,15 @@  int ve_set_release_agent_path(struct cgroup *cgroot,
 	return 0;
 }
 
-const char *ve_get_release_agent_path(struct cgroup *cgroot)
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+	struct cgroup_root *cgroot)
 {
 	/* caller must grab rcu_read_lock */
 	const char *result = NULL;
 	struct per_cgroot_data *data;
 	struct cgroup_rcu_string *str;
-	struct ve_struct *ve;
 	unsigned long flags;
 
-	ve = rcu_dereference(cgroot->ve_owner);
 	if (!ve)
 		return NULL;
 
@@ -677,15 +674,13 @@  static inline void per_cgroot_data_free(struct per_cgroot_data *data)
 	kfree(data);
 }
 
-void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp)
+void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup_root *cgrp)
 {
 	struct per_cgroot_data *data, *saved;
 	unsigned long flags;
 
 	BUG_ON(!ve && !cgrp);
 	rcu_read_lock();
-	if (!ve)
-		ve = cgroup_get_ve_owner(cgrp);
 
 	spin_lock_irqsave(&ve->per_cgroot_list_lock, flags);
 	list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) {