From patchwork Tue Jul 28 17:53:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [RHEL7, v21, 11/14] ve/cgroup: set release_agent_path for root cgroups separately for each ve. From: Valeriy Vdovin X-Patchwork-Id: 13218 Message-Id: <1595958806-338946-12-git-send-email-valeriy.vdovin@virtuozzo.com> To: devel@openvz.org, Kirill Tkhai , Vasily Averin , Konstantin Khorenko Date: Tue, 28 Jul 2020 20:53:23 +0300 This is done so that each container could set it's own release agent. Release agent information is now stored in per-cgroup-root data structure in ve. https://jira.sw.ru/browse/PSBM-83887 Signed-off-by: Valeriy Vdovin --- include/linux/cgroup.h | 3 -- include/linux/ve.h | 6 +++ kernel/cgroup.c | 100 ++++++++++++++++++++++++++++++++++++++++--------- kernel/ve/ve.c | 72 +++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 20 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5f1460d..fc138c0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -429,9 +429,6 @@ struct cgroupfs_root { /* IDs for cgroups in this hierarchy */ struct ida cgroup_ida; - /* The path to use for release notifications. */ - char release_agent_path[PATH_MAX]; - /* The name for this hierarchy - may be empty */ char name[MAX_CGROUP_ROOT_NAMELEN]; }; diff --git a/include/linux/ve.h b/include/linux/ve.h index 65413d5..b6662637 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -214,6 +214,12 @@ void do_update_load_avg_ve(void); void ve_add_to_release_list(struct cgroup *cgrp); void ve_rm_from_release_list(struct cgroup *cgrp); + +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot, + const char *release_agent); + +const char *ve_get_release_agent_path(struct cgroup *cgrp_root); + extern struct ve_struct *get_ve(struct ve_struct *ve); extern void put_ve(struct ve_struct *ve); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index aa93cf2..1d9c889 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1092,9 +1092,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) { + const char *release_agent; struct cgroupfs_root *root = dentry->d_sb->s_fs_info; struct cgroup_subsys *ss; + struct cgroup *root_cgrp = &root->top_cgroup; + mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_root_mutex); for_each_subsys(root, ss) seq_printf(seq, ",%s", ss->name); @@ -1106,14 +1109,37 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",xattr"); if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) seq_puts(seq, ",cpuset_v2_mode"); - if (strlen(root->release_agent_path)) - seq_show_option(seq, "release_agent", - root->release_agent_path); +#ifdef CONFIG_VE + { + struct ve_struct *ve = get_exec_env(); + + if (!ve_is_super(ve)) { + /* + * ve->init_task is NULL in case when cgroup is accessed + * before ve_start_container has been called. + * + * ve->init_task is synchronized via ve->ve_ns rcu, see + * ve_grab_context/drop_context. + */ + rcu_read_lock(); + if (ve->ve_ns) + root_cgrp = task_cgroup_from_root(ve->init_task, + root); + rcu_read_unlock(); + } + } +#endif + rcu_read_lock(); + release_agent = ve_get_release_agent_path(root_cgrp); + if (release_agent && release_agent[0]) + seq_show_option(seq, "release_agent", release_agent); + rcu_read_unlock(); if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags)) seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_show_option(seq, "name", root->name); mutex_unlock(&cgroup_root_mutex); + mutex_unlock(&cgroup_mutex); return 0; } @@ -1386,8 +1412,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) /* re-populate subsystem files */ cgroup_populate_dir(cgrp, false, added_mask); - if (opts.release_agent) - strcpy(root->release_agent_path, opts.release_agent); + if (opts.release_agent) { + struct cgroup *root_cgrp; + root_cgrp = cgroup_get_local_root(cgrp); + if (root_cgrp->ve_owner) + ret = ve_set_release_agent_path(root_cgrp, + opts.release_agent); + } out_unlock: kfree(opts.release_agent); kfree(opts.name); @@ -1549,8 +1580,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) root->subsys_mask = opts->subsys_mask; root->flags = opts->flags; ida_init(&root->cgroup_ida); - if (opts->release_agent) - strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); if (opts->cpuset_clone_children) @@ -1748,6 +1777,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, cred = override_creds(&init_cred); cgroup_populate_dir(root_cgrp, true, root->subsys_mask); + if (opts.release_agent) { + ret = ve_set_release_agent_path(root_cgrp, + opts.release_agent); + } + revert_creds(cred); mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); @@ -2317,7 +2351,8 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { - BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + int ret = 0; + struct cgroup *root_cgrp; if (strlen(buffer) >= PATH_MAX) return -EINVAL; @@ -2325,19 +2360,35 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, if (!cgroup_lock_live_group(cgrp)) return -ENODEV; - mutex_lock(&cgroup_root_mutex); - strcpy(cgrp->root->release_agent_path, buffer); - mutex_unlock(&cgroup_root_mutex); + root_cgrp = cgroup_get_local_root(cgrp); + BUG_ON(!root_cgrp); + if (root_cgrp->ve_owner) + ret = ve_set_release_agent_path(root_cgrp, buffer); + else + return -ENODEV; + mutex_unlock(&cgroup_mutex); - return 0; + return ret; } static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, struct seq_file *seq) { + const char *release_agent; + struct cgroup *root_cgrp; + if (!cgroup_lock_live_group(cgrp)) return -ENODEV; - seq_puts(seq, cgrp->root->release_agent_path); + + root_cgrp = cgroup_get_local_root(cgrp); + if (root_cgrp->ve_owner) { + rcu_read_lock(); + release_agent = ve_get_release_agent_path(root_cgrp); + + if (release_agent) + seq_puts(seq, release_agent); + rcu_read_unlock(); + } seq_putc(seq, '\n'); mutex_unlock(&cgroup_mutex); return 0; @@ -5532,15 +5583,24 @@ static void check_for_release(struct cgroup *cgrp) void cgroup_release_agent(struct work_struct *work) { struct ve_struct *ve; + char *agentbuf; + + agentbuf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!agentbuf) { + pr_warn("failed to allocate agentbuf\n"); + return; + } + ve = container_of(work, struct ve_struct, release_agent_work); mutex_lock(&cgroup_mutex); raw_spin_lock(&ve->release_list_lock); while (!list_empty(&ve->release_list)) { char *argv[3], *envp[3]; int i, err; - char *pathbuf = NULL, *agentbuf = NULL; + char *pathbuf = NULL; struct cgroup *cgrp, *root_cgrp; struct task_struct *ve_task; + const char *release_agent; cgrp = list_entry(ve->release_list.next, struct cgroup, @@ -5568,9 +5628,15 @@ void cgroup_release_agent(struct work_struct *work) rcu_read_unlock(); goto continue_free; } + + release_agent = ve_get_release_agent_path(root_cgrp); + + *agentbuf = 0; + if (release_agent) + strncpy(agentbuf, release_agent, PATH_MAX); rcu_read_unlock(); - agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!agentbuf) + + if (!*agentbuf) goto continue_free; i = 0; @@ -5601,11 +5667,11 @@ void cgroup_release_agent(struct work_struct *work) mutex_lock(&cgroup_mutex); continue_free: kfree(pathbuf); - kfree(agentbuf); raw_spin_lock(&ve->release_list_lock); } raw_spin_unlock(&ve->release_list_lock); mutex_unlock(&cgroup_mutex); + kfree(agentbuf); } static int __init cgroup_disable(char *str) diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index f564dca..f03f665 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -51,6 +51,11 @@ struct per_cgroot_data { * data is related to this cgroup */ struct cgroup *cgroot; + /* + * path to release agent binaray, that should + * be spawned for all cgroups under this cgroup root + */ + struct cgroup_rcu_string __rcu *release_agent_path; }; extern struct kmapset_set sysfs_ve_perms_set; @@ -175,6 +180,68 @@ static inline struct per_cgroot_data *per_cgroot_get_or_create( return data; } +int ve_set_release_agent_path(struct cgroup *cgroot, + const char *release_agent) +{ + struct ve_struct *ve; + struct per_cgroot_data *data; + struct cgroup_rcu_string *new_path, *old_path; + int err = 0; + + /* + * caller should grab cgroup_mutex to safely use + * ve_owner field + */ + ve = cgroot->ve_owner; + BUG_ON(!ve); + + new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent)); + if (IS_ERR(new_path)) + return PTR_ERR(new_path); + + data = per_cgroot_get_or_create(ve, cgroot); + if (IS_ERR(data)) { + kfree(new_path); + return PTR_ERR(data); + } + + raw_spin_lock(&ve->per_cgroot_list_lock); + + old_path = rcu_dereference_protected(data->release_agent_path, + lockdep_is_held(&ve->per_cgroot_list_lock)); + + rcu_assign_pointer(data->release_agent_path, new_path); + raw_spin_unlock(&ve->per_cgroot_list_lock); + + if (old_path) + kfree_rcu(old_path, rcu_head); + + return err; +} + +const char *ve_get_release_agent_path(struct cgroup *cgroot) +{ + /* caller must grab rcu_read_lock */ + const char *result = NULL; + struct per_cgroot_data *data; + struct cgroup_rcu_string *str; + struct ve_struct *ve; + ve = rcu_dereference(cgroot->ve_owner); + if (!ve) + return NULL; + + raw_spin_lock(&ve->per_cgroot_list_lock); + + data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot); + if (data) { + str = rcu_dereference(data->release_agent_path); + if (str) + result = str->val; + } + raw_spin_unlock(&ve->per_cgroot_list_lock); + return result; +} + struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id) { struct cgroup_subsys_state *css, *tmp; @@ -677,9 +744,14 @@ err_list: static void ve_per_cgroot_free(struct ve_struct *ve) { struct per_cgroot_data *data, *saved; + struct cgroup_rcu_string *release_agent; raw_spin_lock(&ve->per_cgroot_list_lock); list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) { + release_agent = data->release_agent_path; + RCU_INIT_POINTER(data->release_agent_path, NULL); + if (release_agent) + kfree_rcu(release_agent, rcu_head); list_del_init(&data->list); kfree(data); }