[2/2,v1] ve/cgroup: Added pointers to owning ve to root cgroups

Submitted by Valeriy Vdovin on March 17, 2020, 10:28 a.m.

Details

Message ID 1584440895-770414-3-git-send-email-valeriy.vdovin@virtuozzo.com
State New
Series "Make release_agent per-cgroup property. Run release_agent in proper ve."
Headers show

Commit Message

Valeriy Vdovin March 17, 2020, 10:28 a.m.
Follow-up patch to per-cgroup release_agent property. release_agent
notifications are spawned from a special kthread, running under ve0. But
newly spawned tasks should run under their own ve context. Easy way to
pass this information to a spawning thread is by adding 've_owner' field
to a root cgroup. At notification any cgroup can be walked upwards to
it's root and get ve_owner from there.

https://jira.sw.ru/browse/PSBM-83887
Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
---
 include/linux/cgroup.h |  3 +++
 include/linux/ve.h     |  8 ++++++++
 kernel/cgroup.c        | 33 +++++++++++++++++++++++++++++++++
 kernel/ve/ve.c         |  3 +++
 4 files changed, 47 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cad5b4f..513658b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -286,6 +286,9 @@  struct cgroup {
 	struct simple_xattrs xattrs;
 	u64 subgroups_limit;
 
+	/* ve_owner, responsible for running release agent. */
+	struct ve_struct *ve_owner;
+
 	/*
 	 * Per-cgroup path to release agent binary for release
 	 * notifications.
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 9d60838..9cc5257 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -268,6 +268,14 @@  static inline struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
 struct seq_file;
 struct kernel_cpustat;
 
+/*
+ * cgroup needs to know it's owning ve for some of operations, but
+ * cgroup's lifetime is independant of ve's, in theory ve can be destroyed
+ * earlier than some of it's cgroups.
+ */
+void ve_add_referring_cgroup(struct ve_struct *ve, struct cgroup *cgrp);
+void ve_remove_referring_cgroups(struct ve_struct *ve);
+
 #if defined(CONFIG_VE) && defined(CONFIG_CGROUP_SCHED)
 int ve_show_cpu_stat(struct ve_struct *ve, struct seq_file *p);
 int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0b64d88..105536b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4318,6 +4318,7 @@  int cgroup_mark_ve_root(struct ve_struct *ve)
 	mutex_lock(&cgroup_mutex);
 	for_each_active_root(root) {
 		cgrp = task_cgroup_from_root(ve->init_task, root);
+		cgrp->ve_owner = ve;
 		set_bit(CGRP_VE_ROOT, &cgrp->flags);
 		err = cgroup_add_file_on_mark_ve(cgrp);
 		if (err)
@@ -4329,6 +4330,19 @@  int cgroup_mark_ve_root(struct ve_struct *ve)
 	return err;
 }
 
+void cgroup_unbind_roots_from_ve(struct ve_struct *ve)
+{
+	struct cgroup *cgrp;
+	struct cgroupfs_root *root;
+
+	mutex_lock(&cgroup_mutex);
+	for_each_active_root(root) {
+		cgrp = task_cgroup_from_root(ve->init_task, root);
+		cgrp->ve_owner = NULL;
+	}
+	mutex_unlock(&cgroup_mutex);
+}
+
 struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
 {
 	struct cgroup *ve_root = NULL;
@@ -5455,6 +5469,7 @@  static void cgroup_release_agent(struct work_struct *work)
 	raw_spin_lock(&release_list_lock);
 	while (!list_empty(&release_list)) {
 		char *argv[3], *envp[3];
+		struct ve_struct *ve;
 		int i, err;
 		char *pathbuf = NULL, *agentbuf = NULL;
 		struct cgroup *root_cgrp;
@@ -5468,7 +5483,20 @@  static void cgroup_release_agent(struct work_struct *work)
 			goto continue_free;
 		if (__cgroup_path(cgrp, pathbuf, PAGE_SIZE, true) < 0)
 			goto continue_free;
+
+		/*
+		 * root_cgrp is the relative root for cgrp, for host
+		 * cgroups root_cgrp is root->top_cgroup, for container
+		 * cgroups it is any up the parent chain from cgrp marked
+		 * as VE_ROOT.
+		 */
 		root_cgrp = cgroup_get_local_root(cgrp);
+
+		ve = NULL;
+		if (root_cgrp->ve_owner)
+			ve = root_cgrp->ve_owner;
+		if (!ve)
+			goto continue_free;
 		if (root_cgrp->release_agent_path)
 			agentbuf = kstrdup(root_cgrp->release_agent_path,
 				GFP_KERNEL);
@@ -5490,7 +5518,12 @@  static void cgroup_release_agent(struct work_struct *work)
 		 * since the exec could involve hitting disk and hence
 		 * be a slow process */
 		mutex_unlock(&cgroup_mutex);
+#ifdef CONFIG_VE
+		err = call_usermodehelper_fns_ve(ve, argv[0], argv,
+			envp, UMH_WAIT_EXEC, NULL, NULL, NULL);
+#else
 		err = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+#endif
 		if (err < 0)
 			pr_warn_ratelimited("cgroup release_agent "
 					    "%s %s failed: %d\n",
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index a64b4a7..37353fb 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -480,6 +480,7 @@  static void ve_drop_context(struct ve_struct *ve)
 static const struct timespec zero_time = { };
 
 extern int cgroup_mark_ve_root(struct ve_struct *ve);
+extern void cgroup_unbind_roots_from_ve(struct ve_struct *ve);
 
 /* under ve->op_sem write-lock */
 static int ve_start_container(struct ve_struct *ve)
@@ -588,10 +589,12 @@  void ve_stop_ns(struct pid_namespace *pid_ns)
 	up_write(&ve->op_sem);
 }
 
 void ve_exit_ns(struct pid_namespace *pid_ns)
 {
 	struct ve_struct *ve = current->task_ve;
 
+	cgroup_unbind_roots_from_ve(ve);
 	/*
 	 * current->cgroups already switched to init_css_set in cgroup_exit(),
 	 * but current->task_ve still points to our exec ve.
-- 
1.8.3.1

Comments

Pavel Tikhomirov March 17, 2020, 11:10 a.m.
On 3/17/20 1:28 PM, Valeriy Vdovin wrote:
> Follow-up patch to per-cgroup release_agent property. release_agent
> notifications are spawned from a special kthread, running under ve0. But
> newly spawned tasks should run under their own ve context. Easy way to
> pass this information to a spawning thread is by adding 've_owner' field
> to a root cgroup. At notification any cgroup can be walked upwards to
> it's root and get ve_owner from there.
> 
> https://jira.sw.ru/browse/PSBM-83887
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
> ---
>   include/linux/cgroup.h |  3 +++
>   include/linux/ve.h     |  8 ++++++++
>   kernel/cgroup.c        | 33 +++++++++++++++++++++++++++++++++
>   kernel/ve/ve.c         |  3 +++
>   4 files changed, 47 insertions(+)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index cad5b4f..513658b 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -286,6 +286,9 @@ struct cgroup {
>   	struct simple_xattrs xattrs;
>   	u64 subgroups_limit;
>   
> +	/* ve_owner, responsible for running release agent. */
> +	struct ve_struct *ve_owner;
> +
>   	/*
>   	 * Per-cgroup path to release agent binary for release
>   	 * notifications.
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 9d60838..9cc5257 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -268,6 +268,14 @@ static inline struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
>   struct seq_file;
>   struct kernel_cpustat;
>   
> +/*
> + * cgroup needs to know it's owning ve for some of operations, but
> + * cgroup's lifetime is independant of ve's, in theory ve can be destroyed
> + * earlier than some of it's cgroups.
> + */
> +void ve_add_referring_cgroup(struct ve_struct *ve, struct cgroup *cgrp);
> +void ve_remove_referring_cgroups(struct ve_struct *ve);
> +
>   #if defined(CONFIG_VE) && defined(CONFIG_CGROUP_SCHED)
>   int ve_show_cpu_stat(struct ve_struct *ve, struct seq_file *p);
>   int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 0b64d88..105536b 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4318,6 +4318,7 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
>   	mutex_lock(&cgroup_mutex);
>   	for_each_active_root(root) {
>   		cgrp = task_cgroup_from_root(ve->init_task, root);
> +		cgrp->ve_owner = ve;
>   		set_bit(CGRP_VE_ROOT, &cgrp->flags);
>   		err = cgroup_add_file_on_mark_ve(cgrp);
>   		if (err)
> @@ -4329,6 +4330,19 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
>   	return err;
>   }
>   
> +void cgroup_unbind_roots_from_ve(struct ve_struct *ve)
> +{
> +	struct cgroup *cgrp;
> +	struct cgroupfs_root *root;
> +
> +	mutex_lock(&cgroup_mutex);
> +	for_each_active_root(root) {
> +		cgrp = task_cgroup_from_root(ve->init_task, root);
> +		cgrp->ve_owner = NULL;
> +	}
> +	mutex_unlock(&cgroup_mutex);
> +}
> +
>   struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
>   {
>   	struct cgroup *ve_root = NULL;
> @@ -5455,6 +5469,7 @@ static void cgroup_release_agent(struct work_struct *work)
>   	raw_spin_lock(&release_list_lock);
>   	while (!list_empty(&release_list)) {
>   		char *argv[3], *envp[3];
> +		struct ve_struct *ve;
>   		int i, err;
>   		char *pathbuf = NULL, *agentbuf = NULL;
>   		struct cgroup *root_cgrp;
> @@ -5468,7 +5483,20 @@ static void cgroup_release_agent(struct work_struct *work)
>   			goto continue_free;
>   		if (__cgroup_path(cgrp, pathbuf, PAGE_SIZE, true) < 0)
>   			goto continue_free;
> +
> +		/*
> +		 * root_cgrp is the relative root for cgrp, for host
> +		 * cgroups root_cgrp is root->top_cgroup, for container
> +		 * cgroups it is any up the parent chain from cgrp marked
> +		 * as VE_ROOT.
> +		 */
>   		root_cgrp = cgroup_get_local_root(cgrp);
> +
> +		ve = NULL;
> +		if (root_cgrp->ve_owner)
> +			ve = root_cgrp->ve_owner;
> +		if (!ve)
> +			goto continue_free;
>   		if (root_cgrp->release_agent_path)
>   			agentbuf = kstrdup(root_cgrp->release_agent_path,
>   				GFP_KERNEL);
> @@ -5490,7 +5518,12 @@ static void cgroup_release_agent(struct work_struct *work)
>   		 * since the exec could involve hitting disk and hence
>   		 * be a slow process */
>   		mutex_unlock(&cgroup_mutex);
> +#ifdef CONFIG_VE
> +		err = call_usermodehelper_fns_ve(ve, argv[0], argv,
> +			envp, UMH_WAIT_EXEC, NULL, NULL, NULL);
> +#else
>   		err = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
> +#endif
>   		if (err < 0)
>   			pr_warn_ratelimited("cgroup release_agent "
>   					    "%s %s failed: %d\n",
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index a64b4a7..37353fb 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -480,6 +480,7 @@ static void ve_drop_context(struct ve_struct *ve)
>   static const struct timespec zero_time = { };
>   
>   extern int cgroup_mark_ve_root(struct ve_struct *ve);
> +extern void cgroup_unbind_roots_from_ve(struct ve_struct *ve);
>   
>   /* under ve->op_sem write-lock */
>   static int ve_start_container(struct ve_struct *ve)
> @@ -588,10 +589,12 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
>   	up_write(&ve->op_sem);
>   }
>   
>   void ve_exit_ns(struct pid_namespace *pid_ns)
>   {
>   	struct ve_struct *ve = current->task_ve;
>   
> +	cgroup_unbind_roots_from_ve(ve);
>   	/*
>   	 * current->cgroups already switched to init_css_set in cgroup_exit(),
>   	 * but current->task_ve still points to our exec ve.

This hunk looks broken and does not apply, should be something like:

@@ -595,6 +596,7 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
  {
         struct ve_struct *ve = current->task_ve;

+       cgroup_unbind_roots_from_ve(ve);
         /*
          * current->cgroups already switched to init_css_set in 
cgroup_exit(),
          * but current->task_ve still points to our exec ve.


>