[RHEL7,v14,11/12] ve/cgroup: set release_agent_path for root cgroups separately for each ve.

Submitted by Valeriy Vdovin on April 21, 2020, 12:53 p.m.

Details

Message ID 1587473629-182085-12-git-send-email-valeriy.vdovin@virtuozzo.com
State New
Series "Make release_agent per-cgroup property. Run release_agent in proper ve."
Headers show

Commit Message

Valeriy Vdovin April 21, 2020, 12:53 p.m.
This is done so that each container could set it's own release agent.
Release agent information is now stored in per-cgroup-root data
structure in ve.

https://jira.sw.ru/browse/PSBM-83887

Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
---
 include/linux/cgroup.h |   5 +-
 include/linux/ve.h     |   7 +++
 kernel/cgroup.c        | 133 ++++++++++++++++++++++++++++++++++++++++---------
 kernel/ve/ve.c         |  69 ++++++++++++++++++++++++-
 4 files changed, 186 insertions(+), 28 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d0ce3cc..911dd48 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -394,9 +394,6 @@  struct cgroupfs_root {
 	/* IDs for cgroups in this hierarchy */
 	struct ida cgroup_ida;
 
-	/* The path to use for release notifications. */
-	char release_agent_path[PATH_MAX];
-
 	/* The name for this hierarchy - may be empty */
 	char name[MAX_CGROUP_ROOT_NAMELEN];
 };
@@ -639,7 +636,7 @@  int cgroup_task_count(const struct cgroup *cgrp);
 void cgroup_release_agent(struct work_struct *work);
 
 #ifdef CONFIG_VE
-void cgroup_mark_ve_roots(struct ve_struct *ve);
+int cgroup_mark_ve_roots(struct ve_struct *ve);
 void cgroup_unmark_ve_roots(struct ve_struct *ve);
 struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp);
 #endif
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 94a07df..da719b4 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -214,6 +214,13 @@  void do_update_load_avg_ve(void);
 
 void ve_add_to_release_list(struct cgroup *cgrp);
 void ve_rm_from_release_list(struct cgroup *cgrp);
+
+int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot,
+	const char *release_agent);
+
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+	struct cgroup *cgrp_root);
+
 extern struct ve_struct *get_ve(struct ve_struct *ve);
 extern void put_ve(struct ve_struct *ve);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ae417ce..006319f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1090,10 +1090,21 @@  static int rebind_subsystems(struct cgroupfs_root *root,
 
 static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 {
+	const char *release_agent;
 	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
 	struct cgroup_subsys *ss;
+	struct cgroup *root_cgrp = &root->top_cgroup;
 
+#ifdef CONFIG_VE
+	struct ve_struct *ve = get_exec_env();
+	if (!ve_is_super(ve)) {
+		mutex_lock(&cgroup_mutex);
+		root_cgrp = task_cgroup_from_root(ve->init_task, root);
+		mutex_unlock(&cgroup_mutex);
+	}
+#endif
 	mutex_lock(&cgroup_root_mutex);
+
 	for_each_subsys(root, ss)
 		seq_printf(seq, ",%s", ss->name);
 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
@@ -1104,9 +1115,12 @@  static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 		seq_puts(seq, ",xattr");
 	if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
 		seq_puts(seq, ",cpuset_v2_mode");
-	if (strlen(root->release_agent_path))
-		seq_show_option(seq, "release_agent",
-				root->release_agent_path);
+	rcu_read_lock();
+	release_agent = ve_get_release_agent_path(root_cgrp->ve_owner,
+		root_cgrp);
+	if (release_agent && release_agent[0])
+		seq_show_option(seq, "release_agent", release_agent);
+	rcu_read_unlock();
 	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
 		seq_puts(seq, ",clone_children");
 	if (strlen(root->name))
@@ -1384,8 +1398,13 @@  static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	/* re-populate subsystem files */
 	cgroup_populate_dir(cgrp, false, added_mask);
 
-	if (opts.release_agent)
-		strcpy(root->release_agent_path, opts.release_agent);
+	if (opts.release_agent) {
+		struct cgroup *root_cgrp;
+		root_cgrp = cgroup_get_local_root(cgrp);
+		if (root_cgrp->ve_owner)
+			ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+				root_cgrp, opts.release_agent);
+	}
  out_unlock:
 	kfree(opts.release_agent);
 	kfree(opts.name);
@@ -1546,8 +1565,6 @@  static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 	root->subsys_mask = opts->subsys_mask;
 	root->flags = opts->flags;
 	ida_init(&root->cgroup_ida);
-	if (opts->release_agent)
-		strcpy(root->release_agent_path, opts->release_agent);
 	if (opts->name)
 		strcpy(root->name, opts->name);
 	if (opts->cpuset_clone_children)
@@ -2313,27 +2330,44 @@  static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
 static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 				      const char *buffer)
 {
-	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-
+	int ret = 0;
+	struct cgroup *root_cgrp;
 	if (strlen(buffer) >= PATH_MAX)
 		return -EINVAL;
 
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 
-	mutex_lock(&cgroup_root_mutex);
-	strcpy(cgrp->root->release_agent_path, buffer);
-	mutex_unlock(&cgroup_root_mutex);
+	root_cgrp = cgroup_get_local_root(cgrp);
+	BUG_ON(!root_cgrp);
+	if (root_cgrp->ve_owner) {
+		ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+			root_cgrp, buffer);
+	}
+
 	mutex_unlock(&cgroup_mutex);
-	return 0;
+	return ret;
 }
 
 static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
 				     struct seq_file *seq)
 {
+	const char *release_agent;
+	struct cgroup *root_cgrp;
+
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
-	seq_puts(seq, cgrp->root->release_agent_path);
+
+	root_cgrp = cgroup_get_local_root(cgrp);
+	if (root_cgrp->ve_owner) {
+		rcu_read_lock();
+		release_agent = ve_get_release_agent_path(
+			root_cgrp->ve_owner, root_cgrp);
+
+		if (release_agent)
+			seq_puts(seq, release_agent);
+		rcu_read_unlock();
+	}
 	seq_putc(seq, '\n');
 	mutex_unlock(&cgroup_mutex);
 	return 0;
@@ -4149,7 +4183,7 @@  static struct cftype files[] = {
 	},
 	{
 		.name = "release_agent",
-		.flags = CFTYPE_ONLY_ON_ROOT,
+		.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE,
 		.read_seq_string = cgroup_release_agent_show,
 		.write_string = cgroup_release_agent_write,
 		.max_write_len = PATH_MAX,
@@ -4278,22 +4312,60 @@  static int subgroups_count(struct cgroup *cgroup)
 	return cgrps_count;
 }
 
+static struct cftype *get_cftype_by_name(const char *name)
+{
+	struct cftype *cft;
+	for (cft = files; cft->name[0] != '\0'; cft++) {
+		if (!strcmp(cft->name, name))
+			return cft;
+	}
+	return NULL;
+}
+
 #ifdef CONFIG_VE
-void cgroup_mark_ve_roots(struct ve_struct *ve)
+int cgroup_mark_ve_roots(struct ve_struct *ve)
 {
-	struct cgroup *cgrp;
+	struct cgroup *cgrp, *tmp;
 	struct cgroupfs_root *root;
+	int err = 0;
+	struct cftype *cft;
+	LIST_HEAD(pending);
+
+	cft = get_cftype_by_name("release_agent");
+	BUG_ON(!cft);
 
+	mutex_lock(&cgroup_cft_mutex);
 	mutex_lock(&cgroup_mutex);
 	for_each_active_root(root) {
 		cgrp = task_cgroup_from_root(ve->init_task, root);
-		cgrp->ve_owner = ve;
+		rcu_assign_pointer(cgrp->ve_owner, ve);
 		set_bit(CGRP_VE_ROOT, &cgrp->flags);
-
+		dget(cgrp->dentry);
+		list_add_tail(&cgrp->cft_q_node, &pending);
 		if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask))
 			link_ve_root_cpu_cgroup(cgrp);
 	}
 	mutex_unlock(&cgroup_mutex);
+	list_for_each_entry_safe(cgrp, tmp, &pending, cft_q_node) {
+		struct inode *inode = cgrp->dentry->d_inode;
+
+		if (err) {
+			dput(cgrp->dentry);
+			continue;
+		}
+
+		mutex_lock(&inode->i_mutex);
+		mutex_lock(&cgroup_mutex);
+		if (!cgroup_is_removed(cgrp))
+			err = cgroup_add_file(cgrp, NULL, cft);
+		mutex_unlock(&cgroup_mutex);
+		mutex_unlock(&inode->i_mutex);
+
+		list_del_init(&cgrp->cft_q_node);
+		dput(cgrp->dentry);
+	}
+	mutex_unlock(&cgroup_cft_mutex);
+	return err;
 }
 
 void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -5429,15 +5501,24 @@  static void check_for_release(struct cgroup *cgrp)
 void cgroup_release_agent(struct work_struct *work)
 {
 	struct ve_struct *ve;
+	char *agentbuf;
+
+	agentbuf = kzalloc(PATH_MAX, GFP_KERNEL);
+	if (!agentbuf) {
+		pr_warn("failed to allocate agentbuf\n");
+		return;
+	}
+
 	ve = container_of(work, struct ve_struct, release_agent_work);
 	mutex_lock(&cgroup_mutex);
 	raw_spin_lock(&ve->release_list_lock);
 	while (!list_empty(&ve->release_list)) {
 		char *argv[3], *envp[3];
 		int i, err;
-		char *pathbuf = NULL, *agentbuf = NULL;
+		char *pathbuf = NULL;
 		struct cgroup *cgrp, *root_cgrp;
 		struct task_struct *ve_task;
+		const char *release_agent;
 
 		cgrp = list_entry(ve->release_list.next,
 				  struct cgroup,
@@ -5465,9 +5546,15 @@  void cgroup_release_agent(struct work_struct *work)
 			rcu_read_unlock();
 			goto continue_free;
 		}
+
+		release_agent = ve_get_release_agent_path(ve, root_cgrp);
+
+		*agentbuf = 0;
+		if (release_agent)
+			strcpy(agentbuf, release_agent);
 		rcu_read_unlock();
-		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
-		if (!agentbuf)
+
+		if (!*agentbuf)
 			goto continue_free;
 
 		i = 0;
@@ -5498,11 +5585,11 @@  void cgroup_release_agent(struct work_struct *work)
 		mutex_lock(&cgroup_mutex);
  continue_free:
 		kfree(pathbuf);
-		kfree(agentbuf);
 		raw_spin_lock(&ve->release_list_lock);
 	}
 	raw_spin_unlock(&ve->release_list_lock);
 	mutex_unlock(&cgroup_mutex);
+	kfree(agentbuf);
 }
 
 static int __init cgroup_disable(char *str)
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index a30343f..5371c10 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -51,6 +51,11 @@  struct per_cgroot_data {
 	 * data is related to this cgroup
 	 */
 	struct cgroup *cgroot;
+	/*
+	 * path to release agent binaray, that should
+	 * be spawned for all cgroups under this cgroup root
+	 */
+	struct cgroup_rcu_string __rcu *release_agent_path;
 };
 
 extern struct kmapset_set sysfs_ve_perms_set;
@@ -175,6 +180,59 @@  static inline struct per_cgroot_data *per_cgroot_get_or_create(
 	return data;
 }
 
+int ve_set_release_agent_path(struct ve_struct *ve,
+	struct cgroup *cgroot, const char *release_agent)
+{
+	struct per_cgroot_data *data;
+	struct cgroup_rcu_string *new_path, *old_path;
+	int err = 0;
+
+	new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent));
+	if (IS_ERR(new_path))
+		return PTR_ERR(new_path);
+
+	data = per_cgroot_get_or_create(ve, cgroot);
+	if (IS_ERR(data)) {
+		kfree(new_path);
+		return PTR_ERR(data);
+	}
+
+	raw_spin_lock(&ve->per_cgroot_list_lock);
+
+	old_path = rcu_dereference_protected(data->release_agent_path,
+		lockdep_is_held(&ve->per_cgroot_list_lock));
+
+	rcu_assign_pointer(data->release_agent_path, new_path);
+	raw_spin_unlock(&ve->per_cgroot_list_lock);
+
+	if (old_path)
+		kfree_rcu(old_path, rcu_head);
+
+	return err;
+}
+
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+	struct cgroup *cgroot)
+{
+	/* caller must grab rcu_read_lock */
+	const char *result = NULL;
+	struct per_cgroot_data *data;
+	struct cgroup_rcu_string *str;
+
+	raw_spin_lock(&ve->per_cgroot_list_lock);
+
+	data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
+	raw_spin_unlock(&ve->per_cgroot_list_lock);
+
+	if (!data)
+		return NULL;
+
+	str = rcu_dereference(data->release_agent_path);
+	if (str)
+		result = str->val;
+	return result;
+}
+
 struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id)
 {
 	struct cgroup_subsys_state *css, *tmp;
@@ -646,7 +704,9 @@  static int ve_start_container(struct ve_struct *ve)
 	if (err < 0)
 		goto err_iterate;
 
-	cgroup_mark_ve_roots(ve);
+	err = cgroup_mark_ve_roots(ve);
+	if (err)
+		goto err_mark_ve;
 
 	ve->is_running = 1;
 
@@ -656,6 +716,8 @@  static int ve_start_container(struct ve_struct *ve)
 
 	return 0;
 
+err_mark_ve:
+	ve_hook_iterate_fini(VE_SS_CHAIN, ve);
 err_iterate:
 	ve_workqueue_stop(ve);
 err_workqueue:
@@ -672,7 +734,12 @@  err_list:
 static void per_cgroot_free_all_locked(struct list_head *per_cgroot_list)
 {
 	struct per_cgroot_data *data, *saved;
+	struct cgroup_rcu_string *release_agent;
 	list_for_each_entry_safe(data, saved, per_cgroot_list, list) {
+		release_agent = data->release_agent_path;
+		RCU_INIT_POINTER(data->release_agent_path, NULL);
+		if (release_agent)
+			kfree_rcu(release_agent, rcu_head);
 		list_del_init(&data->list);
 		kfree(data);
 	}

Comments

Kirill Tkhai April 21, 2020, 3:31 p.m.
On 21.04.2020 15:53, Valeriy Vdovin wrote:
> This is done so that each container could set it's own release agent.
> Release agent information is now stored in per-cgroup-root data
> structure in ve.
> 
> https://jira.sw.ru/browse/PSBM-83887
> 
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
> ---
>  include/linux/cgroup.h |   5 +-
>  include/linux/ve.h     |   7 +++
>  kernel/cgroup.c        | 133 ++++++++++++++++++++++++++++++++++++++++---------
>  kernel/ve/ve.c         |  69 ++++++++++++++++++++++++-
>  4 files changed, 186 insertions(+), 28 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index d0ce3cc..911dd48 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -394,9 +394,6 @@ struct cgroupfs_root {
>  	/* IDs for cgroups in this hierarchy */
>  	struct ida cgroup_ida;
>  
> -	/* The path to use for release notifications. */
> -	char release_agent_path[PATH_MAX];
> -
>  	/* The name for this hierarchy - may be empty */
>  	char name[MAX_CGROUP_ROOT_NAMELEN];
>  };
> @@ -639,7 +636,7 @@ int cgroup_task_count(const struct cgroup *cgrp);
>  void cgroup_release_agent(struct work_struct *work);
>  
>  #ifdef CONFIG_VE
> -void cgroup_mark_ve_roots(struct ve_struct *ve);
> +int cgroup_mark_ve_roots(struct ve_struct *ve);
>  void cgroup_unmark_ve_roots(struct ve_struct *ve);
>  struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp);
>  #endif
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 94a07df..da719b4 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -214,6 +214,13 @@ void do_update_load_avg_ve(void);
>  
>  void ve_add_to_release_list(struct cgroup *cgrp);
>  void ve_rm_from_release_list(struct cgroup *cgrp);
> +
> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot,
> +	const char *release_agent);
> +
> +const char *ve_get_release_agent_path(struct ve_struct *ve,
> +	struct cgroup *cgrp_root);
> +
>  extern struct ve_struct *get_ve(struct ve_struct *ve);
>  extern void put_ve(struct ve_struct *ve);
>  
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index ae417ce..006319f 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1090,10 +1090,21 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>  
>  static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
>  {
> +	const char *release_agent;
>  	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
>  	struct cgroup_subsys *ss;
> +	struct cgroup *root_cgrp = &root->top_cgroup;
>  
> +#ifdef CONFIG_VE
> +	struct ve_struct *ve = get_exec_env();
> +	if (!ve_is_super(ve)) {
> +		mutex_lock(&cgroup_mutex);
> +		root_cgrp = task_cgroup_from_root(ve->init_task, root);
> +		mutex_unlock(&cgroup_mutex);
> +	}
> +#endif
>  	mutex_lock(&cgroup_root_mutex);
> +
>  	for_each_subsys(root, ss)
>  		seq_printf(seq, ",%s", ss->name);
>  	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
> @@ -1104,9 +1115,12 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
>  		seq_puts(seq, ",xattr");
>  	if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
>  		seq_puts(seq, ",cpuset_v2_mode");
> -	if (strlen(root->release_agent_path))
> -		seq_show_option(seq, "release_agent",
> -				root->release_agent_path);
> +	rcu_read_lock();
> +	release_agent = ve_get_release_agent_path(root_cgrp->ve_owner,
> +		root_cgrp);
> +	if (release_agent && release_agent[0])
> +		seq_show_option(seq, "release_agent", release_agent);
> +	rcu_read_unlock();
>  	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
>  		seq_puts(seq, ",clone_children");
>  	if (strlen(root->name))
> @@ -1384,8 +1398,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
>  	/* re-populate subsystem files */
>  	cgroup_populate_dir(cgrp, false, added_mask);
>  
> -	if (opts.release_agent)
> -		strcpy(root->release_agent_path, opts.release_agent);
> +	if (opts.release_agent) {
> +		struct cgroup *root_cgrp;
> +		root_cgrp = cgroup_get_local_root(cgrp);
> +		if (root_cgrp->ve_owner)
> +			ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> +				root_cgrp, opts.release_agent);
> +	}
>   out_unlock:
>  	kfree(opts.release_agent);
>  	kfree(opts.name);
> @@ -1546,8 +1565,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
>  	root->subsys_mask = opts->subsys_mask;
>  	root->flags = opts->flags;
>  	ida_init(&root->cgroup_ida);
> -	if (opts->release_agent)
> -		strcpy(root->release_agent_path, opts->release_agent);
>  	if (opts->name)
>  		strcpy(root->name, opts->name);
>  	if (opts->cpuset_clone_children)
> @@ -2313,27 +2330,44 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
>  static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
>  				      const char *buffer)
>  {
> -	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
> -
> +	int ret = 0;
> +	struct cgroup *root_cgrp;
>  	if (strlen(buffer) >= PATH_MAX)
>  		return -EINVAL;
>  
>  	if (!cgroup_lock_live_group(cgrp))
>  		return -ENODEV;
>  
> -	mutex_lock(&cgroup_root_mutex);
> -	strcpy(cgrp->root->release_agent_path, buffer);
> -	mutex_unlock(&cgroup_root_mutex);
> +	root_cgrp = cgroup_get_local_root(cgrp);
> +	BUG_ON(!root_cgrp);
> +	if (root_cgrp->ve_owner) {
> +		ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> +			root_cgrp, buffer);
> +	}
> +
>  	mutex_unlock(&cgroup_mutex);
> -	return 0;
> +	return ret;
>  }
>  
>  static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
>  				     struct seq_file *seq)
>  {
> +	const char *release_agent;
> +	struct cgroup *root_cgrp;
> +
>  	if (!cgroup_lock_live_group(cgrp))
>  		return -ENODEV;
> -	seq_puts(seq, cgrp->root->release_agent_path);
> +
> +	root_cgrp = cgroup_get_local_root(cgrp);
> +	if (root_cgrp->ve_owner) {
> +		rcu_read_lock();
> +		release_agent = ve_get_release_agent_path(
> +			root_cgrp->ve_owner, root_cgrp);
> +
> +		if (release_agent)
> +			seq_puts(seq, release_agent);
> +		rcu_read_unlock();
> +	}
>  	seq_putc(seq, '\n');
>  	mutex_unlock(&cgroup_mutex);
>  	return 0;
> @@ -4149,7 +4183,7 @@ static struct cftype files[] = {
>  	},
>  	{
>  		.name = "release_agent",
> -		.flags = CFTYPE_ONLY_ON_ROOT,
> +		.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE,
>  		.read_seq_string = cgroup_release_agent_show,
>  		.write_string = cgroup_release_agent_write,
>  		.max_write_len = PATH_MAX,
> @@ -4278,22 +4312,60 @@ static int subgroups_count(struct cgroup *cgroup)
>  	return cgrps_count;
>  }
>  
> +static struct cftype *get_cftype_by_name(const char *name)
> +{
> +	struct cftype *cft;
> +	for (cft = files; cft->name[0] != '\0'; cft++) {
> +		if (!strcmp(cft->name, name))
> +			return cft;
> +	}
> +	return NULL;
> +}
> +
>  #ifdef CONFIG_VE
> -void cgroup_mark_ve_roots(struct ve_struct *ve)
> +int cgroup_mark_ve_roots(struct ve_struct *ve)
>  {
> -	struct cgroup *cgrp;
> +	struct cgroup *cgrp, *tmp;
>  	struct cgroupfs_root *root;
> +	int err = 0;
> +	struct cftype *cft;
> +	LIST_HEAD(pending);
> +
> +	cft = get_cftype_by_name("release_agent");
> +	BUG_ON(!cft);
>  
> +	mutex_lock(&cgroup_cft_mutex);
>  	mutex_lock(&cgroup_mutex);
>  	for_each_active_root(root) {
>  		cgrp = task_cgroup_from_root(ve->init_task, root);
> -		cgrp->ve_owner = ve;
> +		rcu_assign_pointer(cgrp->ve_owner, ve);
>  		set_bit(CGRP_VE_ROOT, &cgrp->flags);
> -
> +		dget(cgrp->dentry);
> +		list_add_tail(&cgrp->cft_q_node, &pending);
>  		if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask))
>  			link_ve_root_cpu_cgroup(cgrp);
>  	}
>  	mutex_unlock(&cgroup_mutex);
> +	list_for_each_entry_safe(cgrp, tmp, &pending, cft_q_node) {
> +		struct inode *inode = cgrp->dentry->d_inode;
> +
> +		if (err) {
> +			dput(cgrp->dentry);
> +			continue;
> +		}
> +
> +		mutex_lock(&inode->i_mutex);
> +		mutex_lock(&cgroup_mutex);
> +		if (!cgroup_is_removed(cgrp))
> +			err = cgroup_add_file(cgrp, NULL, cft);
> +		mutex_unlock(&cgroup_mutex);
> +		mutex_unlock(&inode->i_mutex);
> +
> +		list_del_init(&cgrp->cft_q_node);
> +		dput(cgrp->dentry);
> +	}
> +	mutex_unlock(&cgroup_cft_mutex);

Can we move a logic, which adds/removes a new file, into separate patch?
In case of removing is not needed, commit message should explain, why.

> +	return err;
>  }
>  
>  void cgroup_unmark_ve_roots(struct ve_struct *ve)
> @@ -5429,15 +5501,24 @@ static void check_for_release(struct cgroup *cgrp)
>  void cgroup_release_agent(struct work_struct *work)
>  {
>  	struct ve_struct *ve;
> +	char *agentbuf;
> +
> +	agentbuf = kzalloc(PATH_MAX, GFP_KERNEL);
> +	if (!agentbuf) {
> +		pr_warn("failed to allocate agentbuf\n");
> +		return;
> +	}
> +
>  	ve = container_of(work, struct ve_struct, release_agent_work);
>  	mutex_lock(&cgroup_mutex);
>  	raw_spin_lock(&ve->release_list_lock);
>  	while (!list_empty(&ve->release_list)) {
>  		char *argv[3], *envp[3];
>  		int i, err;
> -		char *pathbuf = NULL, *agentbuf = NULL;
> +		char *pathbuf = NULL;
>  		struct cgroup *cgrp, *root_cgrp;
>  		struct task_struct *ve_task;
> +		const char *release_agent;
>  
>  		cgrp = list_entry(ve->release_list.next,
>  				  struct cgroup,
> @@ -5465,9 +5546,15 @@ void cgroup_release_agent(struct work_struct *work)
>  			rcu_read_unlock();
>  			goto continue_free;
>  		}
> +
> +		release_agent = ve_get_release_agent_path(ve, root_cgrp);
> +
> +		*agentbuf = 0;
> +		if (release_agent)
> +			strcpy(agentbuf, release_agent);
>  		rcu_read_unlock();
> -		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
> -		if (!agentbuf)
> +
> +		if (!*agentbuf)
>  			goto continue_free;
>  
>  		i = 0;
> @@ -5498,11 +5585,11 @@ void cgroup_release_agent(struct work_struct *work)
>  		mutex_lock(&cgroup_mutex);
>   continue_free:
>  		kfree(pathbuf);
> -		kfree(agentbuf);
>  		raw_spin_lock(&ve->release_list_lock);
>  	}
>  	raw_spin_unlock(&ve->release_list_lock);
>  	mutex_unlock(&cgroup_mutex);
> +	kfree(agentbuf);
>  }
>  
>  static int __init cgroup_disable(char *str)
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index a30343f..5371c10 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -51,6 +51,11 @@ struct per_cgroot_data {
>  	 * data is related to this cgroup
>  	 */
>  	struct cgroup *cgroot;
> +	/*
> +	 * path to release agent binaray, that should
> +	 * be spawned for all cgroups under this cgroup root
> +	 */
> +	struct cgroup_rcu_string __rcu *release_agent_path;
>  };
>  
>  extern struct kmapset_set sysfs_ve_perms_set;
> @@ -175,6 +180,59 @@ static inline struct per_cgroot_data *per_cgroot_get_or_create(
>  	return data;
>  }
>  
> +int ve_set_release_agent_path(struct ve_struct *ve,
> +	struct cgroup *cgroot, const char *release_agent)
> +{
> +	struct per_cgroot_data *data;
> +	struct cgroup_rcu_string *new_path, *old_path;
> +	int err = 0;
> +
> +	new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent));
> +	if (IS_ERR(new_path))
> +		return PTR_ERR(new_path);
> +
> +	data = per_cgroot_get_or_create(ve, cgroot);
> +	if (IS_ERR(data)) {
> +		kfree(new_path);
> +		return PTR_ERR(data);
> +	}
> +
> +	raw_spin_lock(&ve->per_cgroot_list_lock);
> +
> +	old_path = rcu_dereference_protected(data->release_agent_path,
> +		lockdep_is_held(&ve->per_cgroot_list_lock));
> +
> +	rcu_assign_pointer(data->release_agent_path, new_path);
> +	raw_spin_unlock(&ve->per_cgroot_list_lock);
> +
> +	if (old_path)
> +		kfree_rcu(old_path, rcu_head);
> +
> +	return err;
> +}
> +
> +const char *ve_get_release_agent_path(struct ve_struct *ve,
> +	struct cgroup *cgroot)
> +{
> +	/* caller must grab rcu_read_lock */
> +	const char *result = NULL;
> +	struct per_cgroot_data *data;
> +	struct cgroup_rcu_string *str;
> +
> +	raw_spin_lock(&ve->per_cgroot_list_lock);
> +
> +	data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
> +	raw_spin_unlock(&ve->per_cgroot_list_lock);
> +
> +	if (!data)
> +		return NULL;
> +
> +	str = rcu_dereference(data->release_agent_path);
> +	if (str)
> +		result = str->val;
> +	return result;
> +}
> +
>  struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id)
>  {
>  	struct cgroup_subsys_state *css, *tmp;
> @@ -646,7 +704,9 @@ static int ve_start_container(struct ve_struct *ve)
>  	if (err < 0)
>  		goto err_iterate;
>  
> -	cgroup_mark_ve_roots(ve);
> +	err = cgroup_mark_ve_roots(ve);
> +	if (err)
> +		goto err_mark_ve;
>  
>  	ve->is_running = 1;
>  
> @@ -656,6 +716,8 @@ static int ve_start_container(struct ve_struct *ve)
>  
>  	return 0;
>  
> +err_mark_ve:
> +	ve_hook_iterate_fini(VE_SS_CHAIN, ve);
>  err_iterate:
>  	ve_workqueue_stop(ve);
>  err_workqueue:
> @@ -672,7 +734,12 @@ err_list:
>  static void per_cgroot_free_all_locked(struct list_head *per_cgroot_list)
>  {
>  	struct per_cgroot_data *data, *saved;
> +	struct cgroup_rcu_string *release_agent;
>  	list_for_each_entry_safe(data, saved, per_cgroot_list, list) {
> +		release_agent = data->release_agent_path;
> +		RCU_INIT_POINTER(data->release_agent_path, NULL);
> +		if (release_agent)
> +			kfree_rcu(release_agent, rcu_head);
>  		list_del_init(&data->list);
>  		kfree(data);
>  	}
>