[rh7,2/2] bc: implement ubc "numproc" limit based on pids cgroup

Submitted by Konstantin Khorenko on June 25, 2018, 3:24 p.m.

Details

Message ID 20180625152418.10541-3-khorenko@virtuozzo.com
State New
Series "bc: rework "numproc" ubc limit to be based on pids cgroup"
Headers show

Commit Message

Konstantin Khorenko June 25, 2018, 3:24 p.m.
Wire pids cgroup into Container's ub.
All real work on accounting and limiting of threads (pids) is done by
pids cgroup, appropriate counters are just synced into beancounter on
/proc/user_beancounters or /proc/bc/$CTID/resources read.

pids cgroup lacks the fail counter, so add it.

Note: vzctl must configure
/sys/fs/cgroup/beancounter/$CTID/beancounter.pids with the path to
Container's pids cgroup like it does for beancounter.{memory,blkio}.
Usually the path looks like "/machine.slice/$CTID".

https://jira.sw.ru/browse/PSBM-86044

Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
---
 include/bc/beancounter.h | 10 ++++++++++
 kernel/bc/beancounter.c  | 41 +++++++++++++++++++++++++++++++++++++++++
 kernel/bc/proc.c         |  2 ++
 kernel/cgroup_pids.c     | 38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/bc/beancounter.h b/include/bc/beancounter.h
index 8a371f20736b..d4ac3259930a 100644
--- a/include/bc/beancounter.h
+++ b/include/bc/beancounter.h
@@ -82,6 +82,7 @@  struct ub_percpu_struct {
 enum {
 	UB_MEM_CGROUP,
 	UB_BLKIO_CGROUP,
+	UB_PIDS_CGROUP,
 	NR_UB_BOUND_CGROUPS,
 };
 
@@ -145,6 +146,12 @@  ub_get_blkio_css(struct user_beancounter *ub)
 	return __ub_get_css(ub, UB_BLKIO_CGROUP);
 }
 
+static inline struct cgroup_subsys_state *
+ub_get_pids_css(struct user_beancounter *ub)
+{
+	return __ub_get_css(ub, UB_PIDS_CGROUP);
+}
+
 static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
 {
 	return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
@@ -288,6 +295,9 @@  extern int ub_update_memcg(struct user_beancounter *ub);
 extern void ub_sync_memcg(struct user_beancounter *ub);
 extern unsigned long ub_total_pages(struct user_beancounter *ub, bool swap);
 
+extern void ub_sync_pids(struct user_beancounter *ub);
+extern struct pids_cgroup *pids_cgroup_from_cont(struct cgroup *cont);
+
 extern const char *ub_rnames[];
 /*
  *	Put a beancounter reference
diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
index 31e5904df708..4c6d1b7f5f25 100644
--- a/kernel/bc/beancounter.c
+++ b/kernel/bc/beancounter.c
@@ -89,6 +89,7 @@  static struct vfsmount *ub_bound_cgroup_mnt[NR_UB_BOUND_CGROUPS];
 
 #define mem_cgroup_mnt		(ub_bound_cgroup_mnt[UB_MEM_CGROUP])
 #define blkio_cgroup_mnt	(ub_bound_cgroup_mnt[UB_BLKIO_CGROUP])
+#define pids_cgroup_mnt		(ub_bound_cgroup_mnt[UB_PIDS_CGROUP])
 
 static void __ub_set_css(struct user_beancounter *ub, int idx,
 			 struct cgroup_subsys_state *css)
@@ -154,6 +155,12 @@  static void ub_set_blkio_css(struct user_beancounter *ub,
 	__ub_set_css(ub, UB_BLKIO_CGROUP, css);
 }
 
+static void ub_set_pids_css(struct user_beancounter *ub,
+			     struct cgroup_subsys_state *css)
+{
+	__ub_set_css(ub, UB_PIDS_CGROUP, css);
+}
+
 /*
  * Used to attach a task to a beancounter in the legacy API.
  */
@@ -199,6 +206,8 @@  extern void mem_cgroup_get_nr_pages(struct mem_cgroup *memcg, int nid,
 				    unsigned long *pages);
 extern unsigned long mem_cgroup_total_pages(struct mem_cgroup *memcg,
 					    bool swap);
+extern void pids_cgroup_sync_beancounter(struct pids_cgroup *pids,
+					 struct user_beancounter *ub);
 
 /*
  * Update memcg limits according to beancounter configuration.
@@ -215,6 +224,18 @@  int ub_update_memcg(struct user_beancounter *ub)
 	return ret;
 }
 
+/*
+ * Synchronize pids cgroup stats with beancounter.
+ */
+void ub_sync_pids(struct user_beancounter *ub)
+{
+	struct cgroup_subsys_state *css;
+
+	css = ub_get_pids_css(ub);
+	pids_cgroup_sync_beancounter(pids_cgroup_from_cont(css->cgroup), ub);
+	css_put(css);
+}
+
 /*
  * Synchronize memcg stats with beancounter.
  */
@@ -644,6 +665,10 @@  static int ub_cgroup_write(struct cgroup *cg, struct cftype *cft,
 		ub_set_blkio_css(ub, cgroup_subsys_state(bound_cg,
 					blkio_subsys_id));
 		break;
+	case UB_PIDS_CGROUP:
+		ub_set_pids_css(ub, cgroup_subsys_state(bound_cg,
+					pids_subsys_id));
+		break;
 	}
 
 	cgroup_kernel_close(bound_cg);
@@ -665,6 +690,13 @@  static struct cftype ub_cgroup_files[] = {
 		.write_string = ub_cgroup_write,
 		.read = ub_cgroup_read,
 	},
+	{
+		.name = "pids",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.private = UB_PIDS_CGROUP,
+		.write_string = ub_cgroup_write,
+		.read = ub_cgroup_read,
+	},
 	{ },	/* terminate */
 };
 
@@ -1169,6 +1201,7 @@  void __init ub_init_late(void)
 {
 	ub_set_mem_css(&ub0, task_subsys_state_check(&init_task,  mem_cgroup_subsys_id, true));
 	ub_set_blkio_css(&ub0, task_subsys_state_check(&init_task, blkio_subsys_id, true));
+	ub_set_pids_css(&ub0, task_subsys_state_check(&init_task, pids_subsys_id, true));
 
 	register_sysctl_table(ub_sysctl_root);
 }
@@ -1181,6 +1214,9 @@  int __init ub_init_cgroup(void)
 	struct cgroup_sb_opts mem_opts = {
 		.subsys_mask    = (1ul << mem_cgroup_subsys_id),
 	};
+	struct cgroup_sb_opts pids_opts = {
+		.subsys_mask    = (1ul << pids_subsys_id),
+	};
 	struct cgroup_sb_opts ub_opts = {
 		.subsys_mask	= (1ul << ub_subsys_id),
 	};
@@ -1195,6 +1231,11 @@  int __init ub_init_cgroup(void)
 		panic("Failed to mount memory cgroup: %ld\n",
 		      PTR_ERR(mem_cgroup_mnt));
 
+	pids_cgroup_mnt = cgroup_kernel_mount(&pids_opts);
+	if (IS_ERR(pids_cgroup_mnt))
+		panic("Failed to mount pids cgroup: %ld\n",
+		      PTR_ERR(pids_cgroup_mnt));
+
 	ub_cgroup_mnt = cgroup_kernel_mount(&ub_opts);
 	if (IS_ERR(ub_cgroup_mnt))
 		panic("Failed to mount beancounter cgroup: %ld\n",
diff --git a/kernel/bc/proc.c b/kernel/bc/proc.c
index 9f60d9991e0a..e5aef9c11001 100644
--- a/kernel/bc/proc.c
+++ b/kernel/bc/proc.c
@@ -71,6 +71,7 @@  static void __show_resources(struct seq_file *f, struct user_beancounter *ub,
 	int i, precharge[UB_RESOURCES];
 
 	ub_sync_memcg(ub);
+	ub_sync_pids(ub);
 	ub_precharge_snapshot(ub, precharge);
 
 	for (i = 0; i < UB_RESOURCES_COMPAT; i++)
@@ -180,6 +181,7 @@  static int ub_show(struct seq_file *f, void *v)
 	struct user_beancounter *ub = v;
 
 	ub_sync_memcg(ub);
+	ub_sync_pids(ub);
 	ub_precharge_snapshot(ub, precharge);
 
 	for (i = 0; i < UB_RESOURCES_COMPAT; i++)
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 610f8df7a25c..465de7ebd817 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -50,8 +50,43 @@  struct pids_cgroup {
 	 */
 	atomic64_t			counter;
 	int64_t				limit;
+#ifdef CONFIG_BEANCOUNTERS
+	/* beancounter-related stats */
+	atomic_long_t pids_failcnt;
+#endif /* CONFIG_BEANCOUNTERS */
 };
 
+#ifdef CONFIG_BEANCOUNTERS
+static inline
+struct pids_cgroup *pids_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+	return container_of(s, struct pids_cgroup, css);
+}
+
+struct pids_cgroup *pids_cgroup_from_cont(struct cgroup *cont)
+{
+	return pids_cgroup_from_css(
+			cgroup_subsys_state(cont, pids_subsys_id));
+}
+
+#include <bc/beancounter.h>
+void pids_cgroup_sync_beancounter(struct pids_cgroup *pids,
+				  struct user_beancounter *ub)
+{
+	unsigned long lim;
+	volatile struct ubparm *p;
+
+	p = &ub->ub_parms[UB_NUMPROC];
+	p->held = p->maxheld = (unsigned long)atomic64_read(&pids->counter);
+	p->failcnt = atomic_long_read(&pids->pids_failcnt);
+
+	lim = pids->limit;
+	lim = lim >= PIDS_MAX ? UB_MAXVALUE :
+		min_t(unsigned long, lim, UB_MAXVALUE);
+	p->barrier = p->limit = lim;
+}
+#endif /* CONFIG_BEANCOUNTERS */
+
 static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
 {
 	return container_of(css, struct pids_cgroup, css);
@@ -164,6 +199,9 @@  static int pids_try_charge(struct pids_cgroup *pids, int num)
 	for (q = pids; q != p; q = parent_pids(q))
 		pids_cancel(q, num);
 	pids_cancel(p, num);
+#ifdef CONFIG_BEANCOUNTERS
+	atomic_long_inc(&pids->pids_failcnt);
+#endif /* CONFIG_BEANCOUNTERS */
 
 	return -EAGAIN;
 }