[2/2] Dump/restore start_time param from /proc/[pid]/stat for each task

Submitted by Valeriy Vdovin on Dec. 31, 2019, 11:41 a.m.

Details

Message ID 1577792473-23925-3-git-send-email-valeriy.vdovin@virtuozzo.com
State New
Series "Start time of a task inside a container."
Headers show

Commit Message

Valeriy Vdovin Dec. 31, 2019, 11:41 a.m.
https://jira.sw.ru/browse/PSBM-100083
Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
---
 criu/cr-dump.c    | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 criu/cr-restore.c | 40 ++++++++++++++++++++++++++++++++++++++++
 images/core.proto |  2 ++
 3 files changed, 91 insertions(+)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 45626e8..859cfa9 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1037,6 +1037,47 @@  int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread
 	return ret;
 }
 
+struct get_internal_start_time_rq {
+	int pid;
+	unsigned long long result;
+};
+
+static int child_get_internal_start_time(void *arg)
+{
+	struct proc_pid_stat p;
+	struct get_internal_start_time_rq *r =
+		(struct get_internal_start_time_rq *)arg;
+
+	/* We need to join ve to access container relative
+	 * value of task's start_time, otherwize we will see
+	 * start_time visible to host.
+	 */
+	join_ve(r->pid, true);
+
+	parse_pid_stat(r->pid, &p);
+	r->result = p.start_time;
+	return 0;
+}
+
+static int dump_task_internal_start_time(int pid, TaskCoreEntry *tc)
+{
+	int ret;
+	struct get_internal_start_time_rq r = {
+		.pid = pid,
+		.result = 0
+	};
+
+	ret = call_in_child_process(child_get_internal_start_time, &r);
+	if (ret) {
+		pr_err("Failed to exec in child\n");
+		return ret;
+	}
+
+	tc->has_start_time = 1;
+	tc->start_time = r.result;
+	return 0;
+}
+
 static int dump_task_core_all(struct parasite_ctl *ctl,
 			      struct pstree_item *item,
 			      const struct proc_pid_stat *stat,
@@ -1063,6 +1104,14 @@  static int dump_task_core_all(struct parasite_ctl *ctl,
 	core->tc->task_state = item->pid->state;
 	core->tc->exit_code = 0;
 
+	ret = dump_task_internal_start_time(pid, core->tc);
+	if (ret) {
+		pr_err("Failed to dump start_time for task %d\n", pid);
+		goto err;
+	}
+
+	pr_info("Dumped start_time of task %d is %lu\n", pid, core->tc->start_time);
+
 	if (stat->tty_nr) {
 		struct pstree_item *p = item;
 
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 170beab..fe04d67 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -947,6 +947,42 @@  static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc)
 static int prepare_itimers(int pid, struct task_restore_args *args, CoreEntry *core);
 static int prepare_mm(pid_t pid, struct task_restore_args *args);
 
+static int restore_start_time(int pid, CoreEntry *core)
+{
+	unsigned long long total_nsec;
+	unsigned long flags;
+	long tps;
+	struct prctl_task_ct_fields ct_fields;
+
+	if (!core) {
+		pr_err("Skipping restore_start_time for pid %d, core is NULL\n", pid);
+		return -1;
+	}
+
+	if (!core->tc->has_start_time) {
+		pr_warn("Skipping restore_start_time for old image version.\n");
+		return -1;
+	}
+
+	tps = sysconf(_SC_CLK_TCK);
+	if (tps == -1) {
+		pr_perror("Failed to get clock ticks via sysconf");
+		return -1;
+	}
+
+	total_nsec = core->tc->start_time * (NSEC_PER_SEC / tps);
+
+	ct_fields.real_start_time = total_nsec;
+	flags = PR_TASK_CT_FIELDS_START_TIME;
+
+	if (prctl(PR_SET_TASK_CT_FIELDS, (unsigned long)&ct_fields, flags, 0, 0)) {
+		pr_perror("Can't set process start time");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int restore_one_alive_task(int pid, CoreEntry *core)
 {
 	unsigned args_len;
@@ -955,6 +991,10 @@  static int restore_one_alive_task(int pid, CoreEntry *core)
 
 	rst_mem_switch_to_private();
 
+	restore_start_time(pid, core);
+	pr_info("Restored start_time of task %d is %lu\n",
+		pid, core->tc->start_time);
+
 	args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) *
 			current->nr_threads, page_size());
 	ta = mmap(NULL, args_len, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
diff --git a/images/core.proto b/images/core.proto
index 6ef5f50..c2b6099 100644
--- a/images/core.proto
+++ b/images/core.proto
@@ -50,6 +50,7 @@  message task_core_entry_VZ730 {
 	optional int32			tty_nr		= 15;
 	optional int32			tty_pgrp	= 16;
 	repeated sa_entry		sigactions	= 17;
+	optional uint64			start_time	= 18;
 }
 
 message task_core_entry {
@@ -79,6 +80,7 @@  message task_core_entry {
 	repeated sa_entry		sigactions	= 15;
 	optional int32			tty_nr		= 16;
 	optional int32			tty_pgrp	= 17;
+	optional uint64			start_time	= 18;
 }
 
 message task_kobj_ids_entry {

Comments

Pavel Tikhomirov Jan. 9, 2020, 1:24 p.m.
On 12/31/19 2:41 PM, Valeriy Vdovin wrote:
> https://jira.sw.ru/browse/PSBM-100083
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin@virtuozzo.com>
> ---
>   criu/cr-dump.c    | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>   criu/cr-restore.c | 40 ++++++++++++++++++++++++++++++++++++++++
>   images/core.proto |  2 ++
>   3 files changed, 91 insertions(+)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 45626e8..859cfa9 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1037,6 +1037,47 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread
>   	return ret;
>   }
>   
> +struct get_internal_start_time_rq {
> +	int pid;
> +	unsigned long long result;
> +};
> +
> +static int child_get_internal_start_time(void *arg)
> +{
> +	struct proc_pid_stat p;
> +	struct get_internal_start_time_rq *r =
> +		(struct get_internal_start_time_rq *)arg;
> +
> +	/* We need to join ve to access container relative
> +	 * value of task's start_time, otherwize we will see
> +	 * start_time visible to host.
> +	 */
> +	join_ve(r->pid, true);

join_veX ?

> +
> +	parse_pid_stat(r->pid, &p);
> +	r->result = p.start_time;
> +	return 0;
> +}
> +
> +static int dump_task_internal_start_time(int pid, TaskCoreEntry *tc)
> +{
> +	int ret;
> +	struct get_internal_start_time_rq r = {
> +		.pid = pid,
> +		.result = 0
> +	};
> +
> +	ret = call_in_child_process(child_get_internal_start_time, &r);
> +	if (ret) {
> +		pr_err("Failed to exec in child\n");
> +		return ret;
> +	}
> +
> +	tc->has_start_time = 1;
> +	tc->start_time = r.result;
> +	return 0;
> +}
> +
>   static int dump_task_core_all(struct parasite_ctl *ctl,
>   			      struct pstree_item *item,
>   			      const struct proc_pid_stat *stat,
> @@ -1063,6 +1104,14 @@ static int dump_task_core_all(struct parasite_ctl *ctl,
>   	core->tc->task_state = item->pid->state;
>   	core->tc->exit_code = 0;
>   
> +	ret = dump_task_internal_start_time(pid, core->tc);
> +	if (ret) {
> +		pr_err("Failed to dump start_time for task %d\n", pid);
> +		goto err;
> +	}
> +
> +	pr_info("Dumped start_time of task %d is %lu\n", pid, core->tc->start_time);
> +
>   	if (stat->tty_nr) {
>   		struct pstree_item *p = item;
>   
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 170beab..fe04d67 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -947,6 +947,42 @@ static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc)
>   static int prepare_itimers(int pid, struct task_restore_args *args, CoreEntry *core);
>   static int prepare_mm(pid_t pid, struct task_restore_args *args);
>   
> +static int restore_start_time(int pid, CoreEntry *core)
> +{
> +	unsigned long long total_nsec;
> +	unsigned long flags;
> +	long tps;
> +	struct prctl_task_ct_fields ct_fields;
> +
> +	if (!core) {
> +		pr_err("Skipping restore_start_time for pid %d, core is NULL\n", pid);
> +		return -1;
> +	}

Doubt that we need these core check, if you know why we need it please 
explain.

> +
> +	if (!core->tc->has_start_time) {
> +		pr_warn("Skipping restore_start_time for old image version.\n");
> +		return -1;
> +	}
> +
> +	tps = sysconf(_SC_CLK_TCK);
> +	if (tps == -1) {
> +		pr_perror("Failed to get clock ticks via sysconf");
> +		return -1;
> +	}
> +
> +	total_nsec = core->tc->start_time * (NSEC_PER_SEC / tps);
> +
> +	ct_fields.real_start_time = total_nsec;
> +	flags = PR_TASK_CT_FIELDS_START_TIME;
> +
> +	if (prctl(PR_SET_TASK_CT_FIELDS, (unsigned long)&ct_fields, flags, 0, 0)) {
> +		pr_perror("Can't set process start time");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>   static int restore_one_alive_task(int pid, CoreEntry *core)
>   {
>   	unsigned args_len;
> @@ -955,6 +991,10 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
>   
>   	rst_mem_switch_to_private();
>   
> +	restore_start_time(pid, core);

We've had a talk that zombie processes need to have start_time restored, 
looks like you still miss it.

[snorch@snorch ~]$ ps axfww -o state,lstart | grep Z
Z Thu Jan  9 16:12:59 2020

Please add same to restore_one_zombie.

> +	pr_info("Restored start_time of task %d is %lu\n",
> +		pid, core->tc->start_time);
> +
>   	args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) *
>   			current->nr_threads, page_size());
>   	ta = mmap(NULL, args_len, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
> diff --git a/images/core.proto b/images/core.proto
> index 6ef5f50..c2b6099 100644
> --- a/images/core.proto
> +++ b/images/core.proto
> @@ -50,6 +50,7 @@ message task_core_entry_VZ730 {
>   	optional int32			tty_nr		= 15;
>   	optional int32			tty_pgrp	= 16;
>   	repeated sa_entry		sigactions	= 17;
> +	optional uint64			start_time	= 18;

In ms criu 18 is already used:

        optional bool                   child_subreaper = 18;

Please send a patch to ms criu to reserve id for start_time. See ms 
commit 69f859436 ("image: core -- Reserve tty fields") for reference.

>   }
>   
>   message task_core_entry {
> @@ -79,6 +80,7 @@ message task_core_entry {
>   	repeated sa_entry		sigactions	= 15;
>   	optional int32			tty_nr		= 16;
>   	optional int32			tty_pgrp	= 17;
> +	optional uint64			start_time	= 18;
>   }
>   
>   message task_kobj_ids_entry {
>